diff options
author | Ronald S. Bultje <rbultje@google.com> | 2013-06-25 11:26:49 -0700 |
---|---|---|
committer | Ronald S. Bultje <rbultje@google.com> | 2013-06-25 12:57:28 -0700 |
commit | c24d922396f4dcd50216be3cdfd784ea31c47564 (patch) | |
tree | da9b0b652199acb1d99b7f28a752d2d76d24c998 /vp9/common/vp9_rtcd_defs.sh | |
parent | 9d95993115db450a3a9d4387d564ae191e19f90e (diff) | |
download | libvpx-c24d922396f4dcd50216be3cdfd784ea31c47564.tar libvpx-c24d922396f4dcd50216be3cdfd784ea31c47564.tar.gz libvpx-c24d922396f4dcd50216be3cdfd784ea31c47564.tar.bz2 libvpx-c24d922396f4dcd50216be3cdfd784ea31c47564.zip |
Add averaging-SAD functions for 8-point comp-inter motion search.
Makes first 50 frames of bus @ 1500kbps encode from 3min22.7 to 3min18.2,
i.e. 2.3% faster. In addition, use the sub_pixel_avg functions to calc
the variance of the averaging predictor. This is slightly suboptimal
because the function is subpixel-position-aware, but it will (at least
for the SSE2 version) not actually use a bilinear filter for a full-pixel
position, thus leading to approximately the same performance compared to
if we implemented an actual average-aware full-pixel variance function.
That gains another 0.3 seconds (i.e. encode time goes to 3min17.4), thus
leading to a total gain of 2.7%.
Change-Id: I3f059d2b04243921868cfed2568d4fa65d7b5acd
Diffstat (limited to 'vp9/common/vp9_rtcd_defs.sh')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index b956e6af9..d71def519 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -369,7 +369,6 @@ specialize vp9_sad8x16 mmx sse2 prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad8x8 mmx sse2 -# TODO(jingning): need to covert these functions into mmx/sse2 form prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad8x4 sse2 @@ -379,6 +378,45 @@ specialize vp9_sad4x8 sse prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad4x4 mmx sse +prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad64x64_avg sse2 + +prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad32x64_avg sse2 + +prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad64x32_avg sse2 + +prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad32x16_avg sse2 + +prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad16x32_avg sse2 + +prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad32x32_avg sse2 + +prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad16x16_avg sse2 + +prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad16x8_avg sse2 + +prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad8x16_avg sse2 + +prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad8x8_avg sse2 + +prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad8x4_avg sse2 + +prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad4x8_avg sse + +prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad4x4_avg sse + prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance_halfpixvar16x16_h sse2 vp9_variance_halfpixvar16x16_h_sse2=vp9_variance_halfpixvar16x16_h_wmt |