summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2017-02-16 12:44:49 -0800
committerJohann Koenig <johannkoenig@google.com>2017-02-24 01:33:35 +0000
commit3c16bbb73bab1ee3785f9de70d71795da489a2c4 (patch)
tree859ad5bf04db3687b807311e5f8febe768af6d8c /vp9
parentf62dcc9c334eb5060293cbf0cfd7de82bb4ea78c (diff)
downloadlibvpx-3c16bbb73bab1ee3785f9de70d71795da489a2c4.tar
libvpx-3c16bbb73bab1ee3785f9de70d71795da489a2c4.tar.gz
libvpx-3c16bbb73bab1ee3785f9de70d71795da489a2c4.tar.bz2
libvpx-3c16bbb73bab1ee3785f9de70d71795da489a2c4.zip
block error sse2: use tran_low_t
Change-Id: Ib04990e4a7bda9fbf501f294da2057a2b2595deb
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_rtcd_defs.pl2
-rw-r--r--vp9/encoder/x86/vp9_error_sse2.asm20
2 files changed, 9 insertions, 13 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f0ab4f69c..c89750425 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -125,7 +125,7 @@ if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp9_block_error avx2/;
+ specialize qw/vp9_block_error avx2 sse2/;
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp9_highbd_block_error sse2/;
diff --git a/vp9/encoder/x86/vp9_error_sse2.asm b/vp9/encoder/x86/vp9_error_sse2.asm
index 5186d3087..0a472ec74 100644
--- a/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/vp9/encoder/x86/vp9_error_sse2.asm
@@ -15,8 +15,6 @@
SECTION .text
-%if CONFIG_VP9_HIGHBITDEPTH
-%else
; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
; int64_t *ssz)
@@ -25,14 +23,14 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
pxor m4, m4 ; sse accumulator
pxor m6, m6 ; ssz accumulator
pxor m5, m5 ; dedicated zero register
- lea uqcq, [uqcq+sizeq*2]
- lea dqcq, [dqcq+sizeq*2]
- neg sizeq
.loop:
- mova m2, [uqcq+sizeq*2]
- mova m0, [dqcq+sizeq*2]
- mova m3, [uqcq+sizeq*2+mmsize]
- mova m1, [dqcq+sizeq*2+mmsize]
+ LOAD_TRAN_LOW 2, uqcq, 0
+ LOAD_TRAN_LOW 0, dqcq, 0
+ LOAD_TRAN_LOW 3, uqcq, 8
+ LOAD_TRAN_LOW 1, dqcq, 8
+ INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16
+ INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16
+ sub sizeq, 16
psubw m0, m2
psubw m1, m3
; individual errors are max. 15bit+sign, so squares are 30bit, and
@@ -58,8 +56,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
punpckhdq m3, m5
paddq m6, m7
paddq m6, m3
- add sizeq, mmsize
- jl .loop
+ jg .loop
; accumulate horizontally and store in return value
movhlps m5, m4
@@ -77,7 +74,6 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
movd edx, m5
%endif
RET
-%endif ; CONFIG_VP9_HIGHBITDEPTH
; Compute the sum of squared difference between two tran_low_t vectors.
; Vectors are converted (if necessary) to int16_t for calculations.