diff options
author | Johann <johannkoenig@google.com> | 2017-05-12 18:11:31 -0700 |
---|---|---|
committer | James Zern <jzern@google.com> | 2017-05-15 08:29:43 -0700 |
commit | 1088b4f87ca5cd8e2b3242060cade0d8bbbcef7a (patch) | |
tree | 0a8c338b5fb646a9e8f81a2ed0fbc93570bbd0fe /vpx_dsp/arm/mem_neon.h | |
parent | 0d88e15454b632d92404dd6a7181c58d9985e2a2 (diff) | |
download | libvpx-1088b4f87ca5cd8e2b3242060cade0d8bbbcef7a.tar libvpx-1088b4f87ca5cd8e2b3242060cade0d8bbbcef7a.tar.gz libvpx-1088b4f87ca5cd8e2b3242060cade0d8bbbcef7a.tar.bz2 libvpx-1088b4f87ca5cd8e2b3242060cade0d8bbbcef7a.zip |
move neon load/stores to a new file
Move the tran_low_t helper functions to a new file. Additional
load/store functions will be added here.
Change-Id: I52bf652c344c585ea2f3e1230886be93f5caefc3
Diffstat (limited to 'vpx_dsp/arm/mem_neon.h')
-rw-r--r-- | vpx_dsp/arm/mem_neon.h | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/vpx_dsp/arm/mem_neon.h b/vpx_dsp/arm/mem_neon.h new file mode 100644 index 000000000..ef6e9decd --- /dev/null +++ b/vpx_dsp/arm/mem_neon.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_ARM_MEM_NEON_H_ +#define VPX_DSP_ARM_MEM_NEON_H_ + +#include <arm_neon.h> +#include <assert.h> +#include <string.h> + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" + +// Helper functions used to load tran_low_t into int16, narrowing if necessary. +static INLINE int16x8x2_t load_tran_low_to_s16x2q(const tran_low_t *buf) { +#if CONFIG_VP9_HIGHBITDEPTH + const int32x4x2_t v0 = vld2q_s32(buf); + const int32x4x2_t v1 = vld2q_s32(buf + 8); + const int16x4_t s0 = vmovn_s32(v0.val[0]); + const int16x4_t s1 = vmovn_s32(v0.val[1]); + const int16x4_t s2 = vmovn_s32(v1.val[0]); + const int16x4_t s3 = vmovn_s32(v1.val[1]); + int16x8x2_t res; + res.val[0] = vcombine_s16(s0, s2); + res.val[1] = vcombine_s16(s1, s3); + return res; +#else + return vld2q_s16(buf); +#endif +} + +static INLINE int16x8_t load_tran_low_to_s16q(const tran_low_t *buf) { +#if CONFIG_VP9_HIGHBITDEPTH + const int32x4_t v0 = vld1q_s32(buf); + const int32x4_t v1 = vld1q_s32(buf + 4); + const int16x4_t s0 = vmovn_s32(v0); + const int16x4_t s1 = vmovn_s32(v1); + return vcombine_s16(s0, s1); +#else + return vld1q_s16(buf); +#endif +} + +static INLINE int16x4_t load_tran_low_to_s16d(const tran_low_t *buf) { +#if CONFIG_VP9_HIGHBITDEPTH + const int32x4_t v0 = vld1q_s32(buf); + return vmovn_s32(v0); +#else + return vld1_s16(buf); +#endif +} + +static INLINE void store_s16q_to_tran_low(tran_low_t *buf, const int16x8_t a) { +#if CONFIG_VP9_HIGHBITDEPTH + const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); + const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); + vst1q_s32(buf, v0); + vst1q_s32(buf + 4, v1); +#else + vst1q_s16(buf, a); +#endif +} +#endif // VPX_DSP_ARM_MEM_NEON_H_ |