summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-03-16 12:03:31 -0700
committerJingning Han <jingning@google.com>2015-03-16 12:07:15 -0700
commit2cfddec3328e86d3901b2a26fe5c2ba882916406 (patch)
treeac364bb4a4995d960ae84525dd4dfeb4ba14f016 /vp9/encoder
parent7cf383d17f41d570deb4d5d2c9153b080483fcc2 (diff)
downloadlibvpx-2cfddec3328e86d3901b2a26fe5c2ba882916406.tar
libvpx-2cfddec3328e86d3901b2a26fe5c2ba882916406.tar.gz
libvpx-2cfddec3328e86d3901b2a26fe5c2ba882916406.tar.bz2
libvpx-2cfddec3328e86d3901b2a26fe5c2ba882916406.zip
Refactor column integral projection computation
Move the scaling factor outside column projection. This avoids repeated calculation of the same scaling factor. Profiling shows that the percentage of vp9_int_pro_col_sse2 of overall cycles goes from 2.29% down to 1.88%. Change-Id: I5ac4e324ab2d7f33ba2de66dd2a12e04e04dfd66
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_mcomp.c5
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c3
2 files changed, 4 insertions, 4 deletions
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 88c5ca0c3..9602eb568 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1805,6 +1805,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv;
unsigned int best_sad, tmp_sad, this_sad[4];
MV this_mv;
+ const int norm_factor = 3 + (bw >> 5);
#if CONFIG_VP9_HIGHBITDEPTH
tmp_mv->row = 0;
@@ -1822,7 +1823,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
for (idx = 0; idx < search_height; ++idx) {
- vbuf[idx] = vp9_int_pro_col(ref_buf, bw);
+ vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor;
ref_buf += ref_stride;
}
@@ -1834,7 +1835,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
src_buf = x->plane[0].src.buf;
for (idx = 0; idx < bh; ++idx) {
- src_vbuf[idx] = vp9_int_pro_col(src_buf, bw);
+ src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor;
src_buf += src_stride;
}
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index 618b5f73d..4e80b255e 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -112,7 +112,6 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
- const int norm_factor = 3 + (width >> 5);
for (i = 16; i < width; i += 16) {
ref += 16;
@@ -124,7 +123,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
s1 = _mm_srli_si128(s0, 8);
s0 = _mm_adds_epu16(s0, s1);
- return _mm_extract_epi16(s0, 0) >> norm_factor;
+ return _mm_extract_epi16(s0, 0);
}
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,