summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2015-02-04 12:02:06 -0800
committerYunqing Wang <yunqingwang@google.com>2015-02-05 11:24:03 -0800
commit789ae447f8d00bb69f5e58ad314b9686c53f4f6d (patch)
tree17fb5486214205319c4a06050b6343ed7eb28a44 /vp9
parentb3b7645a2f8571d3c4ca4ef8168ff1704c5cd5b0 (diff)
downloadlibvpx-789ae447f8d00bb69f5e58ad314b9686c53f4f6d.tar
libvpx-789ae447f8d00bb69f5e58ad314b9686c53f4f6d.tar.gz
libvpx-789ae447f8d00bb69f5e58ad314b9686c53f4f6d.tar.bz2
libvpx-789ae447f8d00bb69f5e58ad314b9686c53f4f6d.zip
Fix high bit depth assembly function bugs
The high bit depth build failed while building for 32bit target. The bugs were in vp9_highbd_subpel_variance.asm and vp9_highbd_sad4d_sse2.asm functions. This patch fixed the bugs, and made 32bit build work. Change-Id: Idc8e5e1b7965bb70d4afba140c6583c5d9666b75
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm19
-rw-r--r--vp9/encoder/x86/vp9_highbd_subpel_variance.asm66
2 files changed, 50 insertions, 35 deletions
diff --git a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm b/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
index 986efb11f..f79a59f02 100644
--- a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
+++ b/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
@@ -215,13 +215,20 @@ SECTION .text
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
%macro HIGH_SADNXN4D 2
%if UNIX64
-cglobal highbd_sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, \
- res, ref2, ref3, ref4, one
+cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
+ res, ref2, ref3, ref4
%else
-cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \
- ref2, ref3, ref4, one
+cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
+ ref2, ref3, ref4
%endif
+; set m1
+ push srcq
+ mov srcd, 0x00010001
+ movd m1, srcd
+ pshufd m1, m1, 0x0
+ pop srcq
+
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
mov ref2q, [ref1q+gprsize*1]
@@ -236,10 +243,6 @@ cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \
shl ref4q, 1
shl ref1q, 1
- mov oned, 0x00010001
- movd m1, oned
- pshufd m1, m1, 0x0
-
HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
%rep (%2-4)/2
HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
diff --git a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
index aebe63b74..987729f96 100644
--- a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -199,6 +199,9 @@ SECTION .text
%if %1 < 16
sar h, 1
%endif
+%if %2 == 1 ; avg
+ shl sec_str, 1
+%endif
; FIXME(rbultje) replace by jumptable?
test x_offsetd, x_offsetd
@@ -223,7 +226,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -232,14 +235,15 @@ SECTION .text
mova m3, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m2, [secq + sec_str*2]
+ add secq, sec_str
+ pavgw m2, [secq]
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -270,7 +274,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -282,14 +286,15 @@ SECTION .text
pavgw m1, m5
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m1, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m1, [secq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -358,7 +363,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -379,14 +384,15 @@ SECTION .text
psrlw m0, 4
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m1, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m1, [secq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -423,7 +429,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -436,14 +442,15 @@ SECTION .text
pavgw m1, m5
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m1, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m1, [secq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -485,7 +492,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -505,7 +512,8 @@ SECTION .text
mova m5, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m2, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m2, [secq]
%endif
SUM_SSE m0, m4, m2, m5, m6, m7
mova m0, m3
@@ -513,7 +521,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -590,7 +598,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -620,7 +628,8 @@ SECTION .text
mova m3, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m4, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m4, [secq]
%endif
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
@@ -628,7 +637,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -698,7 +707,7 @@ SECTION .text
lea srcq, [srcq+src_strideq*2]
lea dstq, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -719,14 +728,15 @@ SECTION .text
psrlw m0, 4
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m1, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m1, [secq]
%endif
SUM_SSE m0, m4, m1, m5, m6, m7
lea srcq, [srcq+src_strideq*4]
lea dstq, [dstq+dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -815,7 +825,7 @@ SECTION .text
lea srcq, [srcq+src_strideq*2]
lea dstq, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -847,7 +857,8 @@ SECTION .text
pavgw m2, m3
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m2, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m2, [secq]
%endif
SUM_SSE m0, m4, m2, m5, m6, m7
mova m0, m3
@@ -855,7 +866,7 @@ SECTION .text
lea srcq, [srcq+src_strideq*4]
lea dstq, [dstq+dst_strideq*4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h
@@ -969,7 +980,7 @@ SECTION .text
INC_SRC_BY_SRC_STRIDE
lea dstq, [dstq + dst_strideq * 2]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*2]
+ add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
@@ -1013,7 +1024,8 @@ SECTION .text
mova m3, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
- pavgw m4, [secq+sec_str*2]
+ add secq, sec_str
+ pavgw m4, [secq]
%endif
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
@@ -1021,7 +1033,7 @@ SECTION .text
INC_SRC_BY_SRC_2STRIDE
lea dstq, [dstq + dst_strideq * 4]
%if %2 == 1 ; avg
- lea secq, [secq + sec_str*4]
+ add secq, sec_str
%endif
%endif
dec h