summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2015-06-18 22:24:54 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-06-18 22:24:55 +0000
commit41d8545ab645959466d4127c46780f28a88e0c73 (patch)
tree477693a73fcf74e2bc8f52e8c9d54f2dad3e2a87 /vp9
parente77f859d723d7c903a8cbcd312534036ae24b1b0 (diff)
parent6e44bf20f72aab0e3a930d25fd0745cfde9a149b (diff)
downloadlibvpx-41d8545ab645959466d4127c46780f28a88e0c73.tar
libvpx-41d8545ab645959466d4127c46780f28a88e0c73.tar.gz
libvpx-41d8545ab645959466d4127c46780f28a88e0c73.tar.bz2
libvpx-41d8545ab645959466d4127c46780f28a88e0c73.zip
Merge "vp9_reconintra_neon: add DC 4x4 predictors"
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.c69
-rw-r--r--vp9/common/vp9_rtcd_defs.pl8
2 files changed, 73 insertions, 4 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c
index a053e4f10..65a29366f 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -15,6 +15,75 @@
#include "vpx/vpx_integer.h"
//------------------------------------------------------------------------------
+// DC 4x4
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x8_t A = vld1_u8(above); // top row
+ const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ sum_top = vcombine_u16(p1, p1);
+ }
+
+ if (do_left) {
+ const uint8x8_t L = vld1_u8(left); // left border
+ const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ sum_left = vcombine_u16(p1, p1);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 3);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 2);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 2);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x8_t dc = vdup_lane_u8(dc0, 0);
+ int i;
+ for (i = 0; i < 4; ++i) {
+ vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0);
+ }
+ }
+}
+
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_4x4(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ dc_4x4(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+ dc_4x4(dst, stride, above, NULL, 1, 0);
+}
+
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_4x4(dst, stride, NULL, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
// DC 8x8
// 'do_above' and 'do_left' facilitate branch removal when inlined.
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index baa8982f4..ef1c496ad 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -84,16 +84,16 @@ add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, cons
specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_4x4 dspr2 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";