summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/vp9_intrapred_test.cc12
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl2
-rw-r--r--vpx_dsp/x86/highbd_intrapred_sse2.asm16
3 files changed, 13 insertions, 17 deletions
diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc
index bc85daff8..70f51cdc6 100644
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -141,7 +141,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
@@ -162,7 +162,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
@@ -194,7 +194,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
@@ -218,7 +218,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
@@ -251,7 +251,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
@@ -275,7 +275,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 3f4fe1127..23065df79 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -294,7 +294,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
diff --git a/vpx_dsp/x86/highbd_intrapred_sse2.asm b/vpx_dsp/x86/highbd_intrapred_sse2.asm
index 9e6dcb50c..c8ed613b3 100644
--- a/vpx_dsp/x86/highbd_intrapred_sse2.asm
+++ b/vpx_dsp/x86/highbd_intrapred_sse2.asm
@@ -17,24 +17,20 @@ pw_16: times 4 dd 16
pw_32: times 4 dd 32
SECTION .text
-INIT_MMX sse
+INIT_XMM sse2
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq
movq m0, [aboveq]
movq m2, [leftq]
- DEFINE_ARGS dst, stride, one
- mov oned, 0x0001
- pxor m1, m1
- movd m3, oned
- pshufw m3, m3, 0x0
paddw m0, m2
- pmaddwd m0, m3
- packssdw m0, m1
- pmaddwd m0, m3
+ pshuflw m1, m0, 0xe
+ paddw m0, m1
+ pshuflw m1, m0, 0x1
+ paddw m0, m1
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq*2], m0
lea dstq, [dstq+strideq*4]