summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorFrank Galligan <fgalligan@google.com>2015-01-13 11:15:24 -0800
committerFrank Galligan <fgalligan@google.com>2015-01-13 15:08:13 -0800
commit74d40cd507594fe775644d52a8f03b23f3ddf8c9 (patch)
tree575424bd9ceb276339dc717185d33b4427e4b2a8 /vp9
parent0337bae9b3cdb2869073911b0efc2b4f3cfcf595 (diff)
downloadlibvpx-74d40cd507594fe775644d52a8f03b23f3ddf8c9.tar
libvpx-74d40cd507594fe775644d52a8f03b23f3ddf8c9.tar.gz
libvpx-74d40cd507594fe775644d52a8f03b23f3ddf8c9.tar.bz2
libvpx-74d40cd507594fe775644d52a8f03b23f3ddf8c9.zip
Add 64x variance Neon functions
Add optimized Neon functions of: vp9_variance32x64 vp9_variance64x32 vp9_variance64x64 On Nexus 7 speed -5 and -6 saw about a 4% increase in perf. Speeds -7 and -8 saw about a 6% increase in perf. Tested on Nexus 7, built with ndk r10d, gcc 4.9. Change-Id: I5a81f13c9897eb927fa39662530f5524a0f768fa
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_rtcd_defs.pl6
-rw-r--r--vp9/encoder/arm/neon/vp9_variance_neon.c27
2 files changed, 30 insertions, 3 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 88f85a86d..b59e6ebe7 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -798,16 +798,16 @@ add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int sourc
specialize qw/vp9_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x32 avx2 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x64/, "$sse2_x86inc";
+specialize qw/vp9_variance32x64 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x64 avx2 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
diff --git a/vp9/encoder/arm/neon/vp9_variance_neon.c b/vp9/encoder/arm/neon/vp9_variance_neon.c
index 816fbda1f..567b7deb1 100644
--- a/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -10,6 +10,7 @@
#include <arm_neon.h>
#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
@@ -28,6 +29,8 @@ enum { kHeight16PlusOne = 17 };
enum { kWidth32 = 32 };
enum { kHeight32 = 32 };
enum { kHeight32PlusOne = 33 };
+enum { kWidth64 = 64 };
+enum { kHeight64 = 64 };
enum { kPixelStepOne = 1 };
enum { kAlign16 = 16 };
@@ -208,6 +211,30 @@ unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride,
return *sse - (((int64_t)sum * sum) / (kWidth32 * kHeight32));
}
+unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight64, sse, &sum);
+ return *sse - (((int64_t)sum * sum) >> 11); // >> 11 = / 32 * 64
+}
+
+unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight32, sse, &sum);
+ return *sse - (((int64_t)sum * sum) >> 11); // >> 11 = / 64 * 32
+}
+
+unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight64, sse, &sum);
+ return *sse - (((int64_t)sum * sum) >> 12); // >> 12 = / 64 * 64
+}
+
unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
int src_stride,
int xoffset,