summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/arm/avg_neon.c1
-rw-r--r--vpx_dsp/arm/fwd_txfm_neon.c1
-rw-r--r--vpx_dsp/arm/hadamard_neon.c1
-rw-r--r--vpx_dsp/arm/idct16x16_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct32x32_135_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct32x32_34_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct32x32_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct4x4_1_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct4x4_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct8x8_add_neon.c1
-rw-r--r--vpx_dsp/arm/idct_neon.h52
-rw-r--r--vpx_dsp/arm/mem_neon.h71
-rw-r--r--vpx_dsp/vpx_dsp.mk1
13 files changed, 82 insertions, 52 deletions
diff --git a/vpx_dsp/arm/avg_neon.c b/vpx_dsp/arm/avg_neon.c
index cca9a9324..257e8ffee 100644
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -16,6 +16,7 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
const uint32x4_t a = vpaddlq_u16(v_16x8);
diff --git a/vpx_dsp/arm/fwd_txfm_neon.c b/vpx_dsp/arm/fwd_txfm_neon.c
index b26920504..c449b4660 100644
--- a/vpx_dsp/arm/fwd_txfm_neon.c
+++ b/vpx_dsp/arm/fwd_txfm_neon.c
@@ -14,6 +14,7 @@
#include "vpx_dsp/txfm_common.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
void vpx_fdct8x8_neon(const int16_t *input, tran_low_t *final_output,
int stride) {
diff --git a/vpx_dsp/arm/hadamard_neon.c b/vpx_dsp/arm/hadamard_neon.c
index ebeafed31..79bedd848 100644
--- a/vpx_dsp/arm/hadamard_neon.c
+++ b/vpx_dsp/arm/hadamard_neon.c
@@ -13,6 +13,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
static void hadamard8x8_one_pass(int16x8_t *a0, int16x8_t *a1, int16x8_t *a2,
diff --git a/vpx_dsp/arm/idct16x16_add_neon.c b/vpx_dsp/arm/idct16x16_add_neon.c
index 828fb5f6c..5c5963d27 100644
--- a/vpx_dsp/arm/idct16x16_add_neon.c
+++ b/vpx_dsp/arm/idct16x16_add_neon.c
@@ -12,6 +12,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
static INLINE void wrap_low_4x2(const int32x4_t *const t32, int16x4_t *const d0,
diff --git a/vpx_dsp/arm/idct32x32_135_add_neon.c b/vpx_dsp/arm/idct32x32_135_add_neon.c
index b39825991..021211bc9 100644
--- a/vpx_dsp/arm/idct32x32_135_add_neon.c
+++ b/vpx_dsp/arm/idct32x32_135_add_neon.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/txfm_common.h"
diff --git a/vpx_dsp/arm/idct32x32_34_add_neon.c b/vpx_dsp/arm/idct32x32_34_add_neon.c
index fc0c4cd84..f3c336fa3 100644
--- a/vpx_dsp/arm/idct32x32_34_add_neon.c
+++ b/vpx_dsp/arm/idct32x32_34_add_neon.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/txfm_common.h"
diff --git a/vpx_dsp/arm/idct32x32_add_neon.c b/vpx_dsp/arm/idct32x32_add_neon.c
index 91418c9e6..9f4589ea9 100644
--- a/vpx_dsp/arm/idct32x32_add_neon.c
+++ b/vpx_dsp/arm/idct32x32_add_neon.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/txfm_common.h"
diff --git a/vpx_dsp/arm/idct4x4_1_add_neon.c b/vpx_dsp/arm/idct4x4_1_add_neon.c
index d1eae24a2..21d21b033 100644
--- a/vpx_dsp/arm/idct4x4_1_add_neon.c
+++ b/vpx_dsp/arm/idct4x4_1_add_neon.c
@@ -12,6 +12,7 @@
#include <assert.h>
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/inv_txfm.h"
static INLINE void idct4x4_1_add_kernel(uint8_t **dest, const int stride,
diff --git a/vpx_dsp/arm/idct4x4_add_neon.c b/vpx_dsp/arm/idct4x4_add_neon.c
index bff98cbc1..e44ba6e75 100644
--- a/vpx_dsp/arm/idct4x4_add_neon.c
+++ b/vpx_dsp/arm/idct4x4_add_neon.c
@@ -13,6 +13,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
diff --git a/vpx_dsp/arm/idct8x8_add_neon.c b/vpx_dsp/arm/idct8x8_add_neon.c
index 279da67d7..1121ade27 100644
--- a/vpx_dsp/arm/idct8x8_add_neon.c
+++ b/vpx_dsp/arm/idct8x8_add_neon.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/txfm_common.h"
diff --git a/vpx_dsp/arm/idct_neon.h b/vpx_dsp/arm/idct_neon.h
index 27c784edc..0fc1de8e4 100644
--- a/vpx_dsp/arm/idct_neon.h
+++ b/vpx_dsp/arm/idct_neon.h
@@ -41,58 +41,6 @@ DECLARE_ALIGNED(16, static const int32_t, kCospi32[16]) = {
};
//------------------------------------------------------------------------------
-// Helper functions used to load tran_low_t into int16, narrowing if necessary.
-
-static INLINE int16x8x2_t load_tran_low_to_s16x2q(const tran_low_t *buf) {
-#if CONFIG_VP9_HIGHBITDEPTH
- const int32x4x2_t v0 = vld2q_s32(buf);
- const int32x4x2_t v1 = vld2q_s32(buf + 8);
- const int16x4_t s0 = vmovn_s32(v0.val[0]);
- const int16x4_t s1 = vmovn_s32(v0.val[1]);
- const int16x4_t s2 = vmovn_s32(v1.val[0]);
- const int16x4_t s3 = vmovn_s32(v1.val[1]);
- int16x8x2_t res;
- res.val[0] = vcombine_s16(s0, s2);
- res.val[1] = vcombine_s16(s1, s3);
- return res;
-#else
- return vld2q_s16(buf);
-#endif
-}
-
-static INLINE int16x8_t load_tran_low_to_s16q(const tran_low_t *buf) {
-#if CONFIG_VP9_HIGHBITDEPTH
- const int32x4_t v0 = vld1q_s32(buf);
- const int32x4_t v1 = vld1q_s32(buf + 4);
- const int16x4_t s0 = vmovn_s32(v0);
- const int16x4_t s1 = vmovn_s32(v1);
- return vcombine_s16(s0, s1);
-#else
- return vld1q_s16(buf);
-#endif
-}
-
-static INLINE int16x4_t load_tran_low_to_s16d(const tran_low_t *buf) {
-#if CONFIG_VP9_HIGHBITDEPTH
- const int32x4_t v0 = vld1q_s32(buf);
- return vmovn_s32(v0);
-#else
- return vld1_s16(buf);
-#endif
-}
-
-static INLINE void store_s16q_to_tran_low(tran_low_t *buf, const int16x8_t a) {
-#if CONFIG_VP9_HIGHBITDEPTH
- const int32x4_t v0 = vmovl_s16(vget_low_s16(a));
- const int32x4_t v1 = vmovl_s16(vget_high_s16(a));
- vst1q_s32(buf, v0);
- vst1q_s32(buf + 4, v1);
-#else
- vst1q_s16(buf, a);
-#endif
-}
-
-//------------------------------------------------------------------------------
// Use saturating add/sub to avoid overflow in 2nd pass in high bit-depth
static INLINE int16x8_t final_add(const int16x8_t a, const int16x8_t b) {
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/arm/mem_neon.h b/vpx_dsp/arm/mem_neon.h
new file mode 100644
index 000000000..ef6e9decd
--- /dev/null
+++ b/vpx_dsp/arm/mem_neon.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_ARM_MEM_NEON_H_
+#define VPX_DSP_ARM_MEM_NEON_H_
+
+#include <arm_neon.h>
+#include <assert.h>
+#include <string.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+// Helper functions used to load tran_low_t into int16, narrowing if necessary.
+static INLINE int16x8x2_t load_tran_low_to_s16x2q(const tran_low_t *buf) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int32x4x2_t v0 = vld2q_s32(buf);
+ const int32x4x2_t v1 = vld2q_s32(buf + 8);
+ const int16x4_t s0 = vmovn_s32(v0.val[0]);
+ const int16x4_t s1 = vmovn_s32(v0.val[1]);
+ const int16x4_t s2 = vmovn_s32(v1.val[0]);
+ const int16x4_t s3 = vmovn_s32(v1.val[1]);
+ int16x8x2_t res;
+ res.val[0] = vcombine_s16(s0, s2);
+ res.val[1] = vcombine_s16(s1, s3);
+ return res;
+#else
+ return vld2q_s16(buf);
+#endif
+}
+
+static INLINE int16x8_t load_tran_low_to_s16q(const tran_low_t *buf) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int32x4_t v0 = vld1q_s32(buf);
+ const int32x4_t v1 = vld1q_s32(buf + 4);
+ const int16x4_t s0 = vmovn_s32(v0);
+ const int16x4_t s1 = vmovn_s32(v1);
+ return vcombine_s16(s0, s1);
+#else
+ return vld1q_s16(buf);
+#endif
+}
+
+static INLINE int16x4_t load_tran_low_to_s16d(const tran_low_t *buf) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int32x4_t v0 = vld1q_s32(buf);
+ return vmovn_s32(v0);
+#else
+ return vld1_s16(buf);
+#endif
+}
+
+static INLINE void store_s16q_to_tran_low(tran_low_t *buf, const int16x8_t a) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int32x4_t v0 = vmovl_s16(vget_low_s16(a));
+ const int32x4_t v1 = vmovl_s16(vget_high_s16(a));
+ vst1q_s32(buf, v0);
+ vst1q_s32(buf + 4, v1);
+#else
+ vst1q_s16(buf, a);
+#endif
+}
+#endif // VPX_DSP_ARM_MEM_NEON_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 8d1ecbe8c..afa14accc 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -352,6 +352,7 @@ endif # CONFIG_VP9_HIGHBITDEPTH
endif # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
# Neon utilities
+DSP_SRCS-$(HAVE_NEON) += arm/mem_neon.h
DSP_SRCS-$(HAVE_NEON) += arm/transpose_neon.h
# PPC VSX utilities