From c24d922396f4dcd50216be3cdfd784ea31c47564 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 25 Jun 2013 11:26:49 -0700 Subject: Add averaging-SAD functions for 8-point comp-inter motion search. Makes first 50 frames of bus @ 1500kbps encode from 3min22.7 to 3min18.2, i.e. 2.3% faster. In addition, use the sub_pixel_avg functions to calc the variance of the averaging predictor. This is slightly suboptimal because the function is subpixel-position-aware, but it will (at least for the SSE2 version) not actually use a bilinear filter for a full-pixel position, thus leading to approximately the same performance compared to if we implemented an actual average-aware full-pixel variance function. That gains another 0.3 seconds (i.e. encode time goes to 3min17.4), thus leading to a total gain of 2.7%. Change-Id: I3f059d2b04243921868cfed2568d4fa65d7b5acd --- vp9/common/vp9_rtcd_defs.sh | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'vp9/common/vp9_rtcd_defs.sh') diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index b956e6af9..d71def519 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -369,7 +369,6 @@ specialize vp9_sad8x16 mmx sse2 prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad8x8 mmx sse2 -# TODO(jingning): need to covert these functions into mmx/sse2 form prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad8x4 sse2 @@ -379,6 +378,45 @@ specialize vp9_sad4x8 sse prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad4x4 mmx sse +prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad64x64_avg sse2 + +prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad32x64_avg sse2 + +prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad64x32_avg sse2 + +prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad32x16_avg sse2 + +prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad16x32_avg sse2 + +prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad32x32_avg sse2 + +prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad16x16_avg sse2 + +prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad16x8_avg sse2 + +prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad8x16_avg sse2 + +prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad8x8_avg sse2 + +prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad8x4_avg sse2 + +prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad4x8_avg sse + +prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" +specialize vp9_sad4x4_avg sse + prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance_halfpixvar16x16_h sse2 vp9_variance_halfpixvar16x16_h_sse2=vp9_variance_halfpixvar16x16_h_wmt -- cgit v1.2.3