summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_variance.c3
-rw-r--r--vp9/encoder/x86/vp9_variance_impl_sse2.asm69
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c15
-rw-r--r--vp9/vp9cx.mk1
4 files changed, 17 insertions, 71 deletions
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index eb5ae2e41..afbb191ad 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -103,8 +103,9 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
- for (i = 0; i < 256; i++)
+ for (i = 0; i < 256; ++i) {
sum += src_ptr[i] * src_ptr[i];
+ }
return sum;
}
diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
deleted file mode 100644
index 6278f2a78..000000000
--- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;unsigned int vp9_get_mb_ss_sse2
-;(
-; short *src_ptr
-;)
-global sym(vp9_get_mb_ss_sse2) PRIVATE
-sym(vp9_get_mb_ss_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 1
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
-
- mov rax, arg(0) ;[src_ptr]
- mov rcx, 8
- pxor xmm4, xmm4
-
-.NEXTROW:
- movdqa xmm0, [rax]
- movdqa xmm1, [rax+16]
- movdqa xmm2, [rax+32]
- movdqa xmm3, [rax+48]
- pmaddwd xmm0, xmm0
- pmaddwd xmm1, xmm1
- pmaddwd xmm2, xmm2
- pmaddwd xmm3, xmm3
-
- paddd xmm0, xmm1
- paddd xmm2, xmm3
- paddd xmm4, xmm0
- paddd xmm4, xmm2
-
- add rax, 0x40
- dec rcx
- ja .NEXTROW
-
- movdqa xmm3,xmm4
- psrldq xmm4,8
- paddd xmm4,xmm3
- movdqa xmm3,xmm4
- psrldq xmm4,4
- paddd xmm4,xmm3
- movq rax,xmm4
-
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 1cc4bbca6..b4d2b0ac4 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -19,6 +19,21 @@ typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
const unsigned char *ref, int ref_stride,
unsigned int *sse, int *sum);
+unsigned int vp9_get_mb_ss_sse2(const int16_t *src) {
+ __m128i vsum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 32; ++i) {
+ const __m128i v = _mm_loadu_si128((const __m128i *)src);
+ vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
+ src += 8;
+ }
+
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
+ return _mm_cvtsi128_si32(vsum);
+}
+
#define READ64(p, stride, i) \
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
_mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 312f71700..e450f7b7f 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -93,7 +93,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c