summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2016-01-25 20:57:15 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2016-01-25 20:57:15 +0000
commit3a2ad10de2d67a663d23476392a7bc648cb2e8e7 (patch)
treeff9e0277cef9aae34b6070f22d386455f6b0068a /vpx_dsp
parent9e612763f0eeab6c231afe6a2821ebed529715c9 (diff)
parent789dbb3131380f95fc1507de61fe180a12db44f3 (diff)
downloadlibvpx-3a2ad10de2d67a663d23476392a7bc648cb2e8e7.tar
libvpx-3a2ad10de2d67a663d23476392a7bc648cb2e8e7.tar.gz
libvpx-3a2ad10de2d67a663d23476392a7bc648cb2e8e7.tar.bz2
libvpx-3a2ad10de2d67a663d23476392a7bc648cb2e8e7.zip
Merge "Code clean of sad4xNx4D_sse"
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl4
-rw-r--r--vpx_dsp/x86/sad4d_sse2.asm56
2 files changed, 34 insertions, 26 deletions
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 6c6f15e51..ff994e30e 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1156,10 +1156,10 @@ add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
#
# Structured Similarity (SSIM)
diff --git a/vpx_dsp/x86/sad4d_sse2.asm b/vpx_dsp/x86/sad4d_sse2.asm
index a2f0ae79e..3f6e55ce9 100644
--- a/vpx_dsp/x86/sad4d_sse2.asm
+++ b/vpx_dsp/x86/sad4d_sse2.asm
@@ -20,33 +20,41 @@ SECTION .text
movd m4, [ref2q+%3]
movd m7, [ref3q+%3]
movd m5, [ref4q+%3]
- punpckldq m0, [srcq +%4]
- punpckldq m6, [ref1q+%5]
- punpckldq m4, [ref2q+%5]
- punpckldq m7, [ref3q+%5]
- punpckldq m5, [ref4q+%5]
+ movd m1, [srcq +%4]
+ movd m2, [ref1q+%5]
+ punpckldq m0, m1
+ punpckldq m6, m2
+ movd m1, [ref2q+%5]
+ movd m2, [ref3q+%5]
+ movd m3, [ref4q+%5]
+ punpckldq m4, m1
+ punpckldq m7, m2
+ punpckldq m5, m3
+ movlhps m0, m0
+ movlhps m6, m4
+ movlhps m7, m5
psadbw m6, m0
- psadbw m4, m0
psadbw m7, m0
- psadbw m5, m0
- punpckldq m6, m4
- punpckldq m7, m5
%else
movd m1, [ref1q+%3]
+ movd m5, [ref1q+%5]
movd m2, [ref2q+%3]
+ movd m4, [ref2q+%5]
+ punpckldq m1, m5
+ punpckldq m2, m4
movd m3, [ref3q+%3]
+ movd m5, [ref3q+%5]
+ punpckldq m3, m5
movd m4, [ref4q+%3]
- punpckldq m0, [srcq +%4]
- punpckldq m1, [ref1q+%5]
- punpckldq m2, [ref2q+%5]
- punpckldq m3, [ref3q+%5]
- punpckldq m4, [ref4q+%5]
+ movd m5, [ref4q+%5]
+ punpckldq m4, m5
+ movd m5, [srcq +%4]
+ punpckldq m0, m5
+ movlhps m0, m0
+ movlhps m1, m2
+ movlhps m3, m4
psadbw m1, m0
- psadbw m2, m0
psadbw m3, m0
- psadbw m4, m0
- punpckldq m1, m2
- punpckldq m3, m4
paddd m6, m1
paddd m7, m3
%endif
@@ -170,7 +178,7 @@ SECTION .text
; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
-; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
+; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
%macro SADNXN4D 2
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
@@ -192,7 +200,7 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
%endrep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
-%if mmsize == 16
+%if %1 > 4
pslldq m5, 4
pslldq m7, 4
por m4, m5
@@ -207,8 +215,10 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
RET
%else
movifnidn r4, r4mp
- movq [r4+0], m6
- movq [r4+8], m7
+ pshufd m6, m6, 0x08
+ pshufd m7, m7, 0x08
+ movq [r4+0], m6
+ movq [r4+8], m7
RET
%endif
%endmacro
@@ -225,7 +235,5 @@ SADNXN4D 16, 8
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
-
-INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4