From a5210082013a5f9ac868f918a02ecf3766bc107c Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Mon, 2 Mar 2015 10:28:12 -0800 Subject: Scale the normalization factor depending on the block size Change-Id: I0a26994bf65ea224e496b09af2ce71e1a4210433 --- vp9/encoder/vp9_avg.c | 6 ++++-- vp9/encoder/x86/vp9_avg_intrin_sse2.c | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'vp9') diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c index 50c8bca0b..90d113c32 100644 --- a/vp9/encoder/vp9_avg.c +++ b/vp9/encoder/vp9_avg.c @@ -32,12 +32,13 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) { void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height) { int idx; + const int norm_factor = MAX(8, height >> 1); for (idx = 0; idx < 16; ++idx) { int i; hbuf[idx] = 0; for (i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride]; - hbuf[idx] /= 32; + hbuf[idx] /= norm_factor; ++ref; } } @@ -45,9 +46,10 @@ void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) { int idx; int16_t sum = 0; + const int norm_factor = MAX(8, width >> 1); for (idx = 0; idx < width; ++idx) sum += ref[idx]; - return sum / 32; + return sum / norm_factor; } int vp9_vector_var_c(int16_t const *ref, int16_t const *src, diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c index 482fa3da3..f49949940 100644 --- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -90,8 +90,16 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, s0 = _mm_adds_epu16(s0, t0); s1 = _mm_adds_epu16(s1, t1); - s0 = _mm_srai_epi16(s0, 5); - s1 = _mm_srai_epi16(s1, 5); + if (height == 64) { + s0 = _mm_srai_epi16(s0, 5); + s1 = _mm_srai_epi16(s1, 5); + } else if (height == 32) { + s0 = _mm_srai_epi16(s0, 4); + s1 = _mm_srai_epi16(s1, 4); + } else { + s0 = _mm_srai_epi16(s0, 3); + s1 = _mm_srai_epi16(s1, 3); + } _mm_store_si128((__m128i *)hbuf, s0); hbuf += 8; @@ -104,6 +112,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { __m128i s0 = _mm_sad_epu8(src_line, zero); __m128i s1; int i; + const int norm_factor = 3 + (width >> 5); for (i = 16; i < width; i += 16) { ref += 16; @@ -115,7 +124,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { s1 = _mm_srli_si128(s0, 8); s0 = _mm_adds_epu16(s0, s1); - return (_mm_extract_epi16(s0, 0)) >> 5; + return _mm_extract_epi16(s0, 0) >> norm_factor; } int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, -- cgit v1.2.3