summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2015-12-22 16:45:01 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-12-22 16:45:01 +0000
commitcedb1db5946cfa6baeb344355fa07bf58d2b6a2c (patch)
tree833891eaa81acc13df9bc056b78fdec0a7140667
parenta097963f80d327a895796992a4782a1c02339dce (diff)
parentdb1130750295a0b70375a84ba916e15b77d8db15 (diff)
downloadlibvpx-cedb1db5946cfa6baeb344355fa07bf58d2b6a2c.tar
libvpx-cedb1db5946cfa6baeb344355fa07bf58d2b6a2c.tar.gz
libvpx-cedb1db5946cfa6baeb344355fa07bf58d2b6a2c.tar.bz2
libvpx-cedb1db5946cfa6baeb344355fa07bf58d2b6a2c.zip
Merge "Code clean of highbd_tm_predictor_4x4"
-rw-r--r--test/vp9_intrapred_test.cc12
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl2
-rw-r--r--vpx_dsp/x86/highbd_intrapred_sse2.asm53
3 files changed, 34 insertions, 33 deletions
diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc
index 70f51cdc6..2bebdcbd9 100644
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -155,7 +155,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
@@ -176,7 +176,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
@@ -211,7 +211,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
@@ -233,7 +233,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 10),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 10),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
@@ -268,7 +268,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
@@ -290,7 +290,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 12),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 12),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 23065df79..a2a067457 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -291,7 +291,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
diff --git a/vpx_dsp/x86/highbd_intrapred_sse2.asm b/vpx_dsp/x86/highbd_intrapred_sse2.asm
index c8ed613b3..233958a52 100644
--- a/vpx_dsp/x86/highbd_intrapred_sse2.asm
+++ b/vpx_dsp/x86/highbd_intrapred_sse2.asm
@@ -257,43 +257,44 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
jnz .loop
REP_RET
-INIT_MMX sse
-cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
+INIT_XMM sse2
+cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps
movd m1, [aboveq-2]
movq m0, [aboveq]
- pshufw m1, m1, 0x0
+ pshuflw m1, m1, 0x0
+ movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4
+ movlhps m1, m1 ; tl tl tl tl tl tl tl tl
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- movd m3, oned
+ pcmpeqw m3, m3
movd m4, bpsd
- pshufw m3, m3, 0x0
- DEFINE_ARGS dst, stride, line, left
- mov lineq, -2
- mova m2, m3
+ psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl
psllw m3, m4
- add leftq, 8
- psubw m3, m2 ; max possible value
- pxor m4, m4 ; min possible value
- psubw m0, m1
-.loop:
- movq m1, [leftq+lineq*4]
- movq m2, [leftq+lineq*4+2]
- pshufw m1, m1, 0x0
- pshufw m2, m2, 0x0
- paddw m1, m0
+ pcmpeqw m2, m2
+ pxor m4, m4 ; min possible value
+ pxor m3, m2 ; max possible value
+ mova m1, [leftq]
+ pshuflw m2, m1, 0x0
+ pshuflw m5, m1, 0x55
+ movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2
paddw m2, m0
;Clamp to the bit-depth
- pminsw m1, m3
pminsw m2, m3
- pmaxsw m1, m4
pmaxsw m2, m4
;Store the values
- movq [dstq ], m1
- movq [dstq+strideq*2], m2
+ movq [dstq ], m2
+ movhpd [dstq+strideq*2], m2
lea dstq, [dstq+strideq*4]
- inc lineq
- jnz .loop
- REP_RET
+ pshuflw m2, m1, 0xaa
+ pshuflw m5, m1, 0xff
+ movlhps m2, m5
+ paddw m2, m0
+ ;Clamp to the bit-depth
+ pminsw m2, m3
+ pmaxsw m2, m4
+ ;Store the values
+ movq [dstq ], m2
+ movhpd [dstq+strideq*2], m2
+ RET
INIT_XMM sse2
cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one