diff options
-rw-r--r-- | vp9/common/vp9_idct.h | 63 | ||||
-rw-r--r-- | vp9/common/x86/vp9_idct_intrin_sse2.c | 4 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct32x32_sse2_impl.h | 2 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 3 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_ssse3.c | 1 | ||||
-rw-r--r-- | vpx_dsp/arm/fwd_txfm_neon.c | 3 | ||||
-rw-r--r-- | vpx_dsp/fwd_txfm.h | 2 | ||||
-rw-r--r-- | vpx_dsp/mips/fwd_txfm_msa.h | 2 | ||||
-rw-r--r-- | vpx_dsp/txfm_common.h | 66 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp.mk | 2 | ||||
-rw-r--r-- | vpx_dsp/x86/fwd_txfm_impl_sse2.h | 3 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_quantize_intrin_sse2.c | 1 | ||||
-rw-r--r-- | vpx_dsp/x86/txfm_common_sse2.h | 29 |
13 files changed, 111 insertions, 70 deletions
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index cbce2dd89..2aa8ee978 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -14,6 +14,7 @@ #include <assert.h> #include "./vpx_config.h" +#include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_enums.h" @@ -22,68 +23,6 @@ extern "C" { #endif -// Constants and Macros used by all idct/dct functions -#define DCT_CONST_BITS 14 -#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) - -#define UNIT_QUANT_SHIFT 2 -#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) - -#define pair_set_epi16(a, b) \ - _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ - (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) - -#define dual_set_epi16(a, b) \ - _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ - (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) - -#define octa_set_epi16(a, b, c, d, e, f, g, h) \ - _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ - (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) - -// Constants: -// for (int i = 1; i< 32; ++i) -// printf("static const int cospi_%d_64 = %.0f;\n", i, -// round(16384 * cos(i*M_PI/64))); -// Note: sin(k*Pi/64) = cos((32-k)*Pi/64) -static const tran_high_t cospi_1_64 = 16364; -static const tran_high_t cospi_2_64 = 16305; -static const tran_high_t cospi_3_64 = 16207; -static const tran_high_t cospi_4_64 = 16069; -static const tran_high_t cospi_5_64 = 15893; -static const tran_high_t cospi_6_64 = 15679; -static const tran_high_t cospi_7_64 = 15426; -static const tran_high_t cospi_8_64 = 15137; -static const tran_high_t cospi_9_64 = 14811; -static const tran_high_t cospi_10_64 = 14449; -static const tran_high_t cospi_11_64 = 14053; -static const tran_high_t cospi_12_64 = 13623; -static const tran_high_t cospi_13_64 = 13160; -static const tran_high_t cospi_14_64 = 12665; -static const tran_high_t cospi_15_64 = 12140; -static const tran_high_t cospi_16_64 = 11585; -static const tran_high_t cospi_17_64 = 11003; -static const tran_high_t cospi_18_64 = 10394; -static const tran_high_t cospi_19_64 = 9760; -static const tran_high_t cospi_20_64 = 9102; -static const tran_high_t cospi_21_64 = 8423; -static const tran_high_t cospi_22_64 = 7723; -static const tran_high_t cospi_23_64 = 7005; -static const tran_high_t cospi_24_64 = 6270; -static const tran_high_t cospi_25_64 = 5520; -static const tran_high_t cospi_26_64 = 4756; -static const tran_high_t cospi_27_64 = 3981; -static const tran_high_t cospi_28_64 = 3196; -static const tran_high_t cospi_29_64 = 2404; -static const tran_high_t cospi_30_64 = 1606; -static const tran_high_t cospi_31_64 = 804; - -// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 -static const tran_high_t sinpi_1_9 = 5283; -static const tran_high_t sinpi_2_9 = 9929; -static const tran_high_t sinpi_3_9 = 13377; -static const tran_high_t sinpi_4_9 = 15212; - static INLINE tran_low_t check_range(tran_high_t input) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid VP9 input streams, intermediate stage coefficients should always diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index ce010df3b..086cadf4e 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -9,9 +9,9 @@ */ #include "./vp9_rtcd.h" -#include "vpx_ports/mem.h" #include "vp9/common/x86/vp9_idct_intrin_sse2.h" -#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" +#include "vpx_ports/mem.h" #define RECON_AND_STORE4X4(dest, in_x) \ { \ diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h index 02773d851..f0707eaa0 100644 --- a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h +++ b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h @@ -11,6 +11,8 @@ #include <emmintrin.h> // SSE2 #include "vp9/encoder/vp9_dct.h" +#include "vpx_dsp/txfm_common.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" #if DCT_HIGH_BIT_DEPTH diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index 04d2f96ff..fe2619838 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -13,8 +13,9 @@ #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_idct.h" // for cospi constants #include "vp9/encoder/x86/vp9_dct_sse2.h" +#include "vpx_dsp/txfm_common.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c index 96038fee1..0e7d2a140 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/vp9/encoder/x86/vp9_dct_ssse3.c @@ -18,6 +18,7 @@ #include "./vp9_rtcd.h" #include "vp9/common/x86/vp9_idct_intrin_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, int16_t* coeff_ptr, intptr_t n_coeffs, diff --git a/vpx_dsp/arm/fwd_txfm_neon.c b/vpx_dsp/arm/fwd_txfm_neon.c index 66faea531..6258b610a 100644 --- a/vpx_dsp/arm/fwd_txfm_neon.c +++ b/vpx_dsp/arm/fwd_txfm_neon.c @@ -9,8 +9,9 @@ */ #include <arm_neon.h> + #include "./vpx_config.h" -#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/txfm_common.h" void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) { int i; diff --git a/vpx_dsp/fwd_txfm.h b/vpx_dsp/fwd_txfm.h index fa54403d0..509fe7fe3 100644 --- a/vpx_dsp/fwd_txfm.h +++ b/vpx_dsp/fwd_txfm.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/txfm_common.h" static INLINE tran_high_t fdct_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); diff --git a/vpx_dsp/mips/fwd_txfm_msa.h b/vpx_dsp/mips/fwd_txfm_msa.h index 31ccf3bcb..d4c68ec2c 100644 --- a/vpx_dsp/mips/fwd_txfm_msa.h +++ b/vpx_dsp/mips/fwd_txfm_msa.h @@ -12,7 +12,7 @@ #define VPX_DSP_MIPS_FWD_TXFM_MSA_H_ #include "vpx_dsp/mips/txfm_macros_msa.h" -#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/txfm_common.h" #define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \ diff --git a/vpx_dsp/txfm_common.h b/vpx_dsp/txfm_common.h new file mode 100644 index 000000000..442e6a57b --- /dev/null +++ b/vpx_dsp/txfm_common.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_TXFM_COMMON_H_ +#define VPX_DSP_TXFM_COMMON_H_ + +#include "vpx_dsp/vpx_dsp_common.h" + +// Constants and Macros used by all idct/dct functions +#define DCT_CONST_BITS 14 +#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) + +#define UNIT_QUANT_SHIFT 2 +#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) + +// Constants: +// for (int i = 1; i< 32; ++i) +// printf("static const int cospi_%d_64 = %.0f;\n", i, +// round(16384 * cos(i*M_PI/64))); +// Note: sin(k*Pi/64) = cos((32-k)*Pi/64) +static const tran_high_t cospi_1_64 = 16364; +static const tran_high_t cospi_2_64 = 16305; +static const tran_high_t cospi_3_64 = 16207; +static const tran_high_t cospi_4_64 = 16069; +static const tran_high_t cospi_5_64 = 15893; +static const tran_high_t cospi_6_64 = 15679; +static const tran_high_t cospi_7_64 = 15426; +static const tran_high_t cospi_8_64 = 15137; +static const tran_high_t cospi_9_64 = 14811; +static const tran_high_t cospi_10_64 = 14449; +static const tran_high_t cospi_11_64 = 14053; +static const tran_high_t cospi_12_64 = 13623; +static const tran_high_t cospi_13_64 = 13160; +static const tran_high_t cospi_14_64 = 12665; +static const tran_high_t cospi_15_64 = 12140; +static const tran_high_t cospi_16_64 = 11585; +static const tran_high_t cospi_17_64 = 11003; +static const tran_high_t cospi_18_64 = 10394; +static const tran_high_t cospi_19_64 = 9760; +static const tran_high_t cospi_20_64 = 9102; +static const tran_high_t cospi_21_64 = 8423; +static const tran_high_t cospi_22_64 = 7723; +static const tran_high_t cospi_23_64 = 7005; +static const tran_high_t cospi_24_64 = 6270; +static const tran_high_t cospi_25_64 = 5520; +static const tran_high_t cospi_26_64 = 4756; +static const tran_high_t cospi_27_64 = 3981; +static const tran_high_t cospi_28_64 = 3196; +static const tran_high_t cospi_29_64 = 2404; +static const tran_high_t cospi_30_64 = 1606; +static const tran_high_t cospi_31_64 = 804; + +// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 +static const tran_high_t sinpi_1_9 = 5283; +static const tran_high_t sinpi_2_9 = 9929; +static const tran_high_t sinpi_3_9 = 13377; +static const tran_high_t sinpi_4_9 = 15212; + +#endif // VPX_DSP_TXFM_COMMON_H_ diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 2dfb8154f..1f31651bf 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -61,6 +61,8 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c endif # CONFIG_VP9_HIGHBITDEPTH +DSP_SRCS-yes += txfm_common.h +DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h # forward transform ifeq ($(CONFIG_VP9_ENCODER),yes) diff --git a/vpx_dsp/x86/fwd_txfm_impl_sse2.h b/vpx_dsp/x86/fwd_txfm_impl_sse2.h index 87d0e609c..abe06aae2 100644 --- a/vpx_dsp/x86/fwd_txfm_impl_sse2.h +++ b/vpx_dsp/x86/fwd_txfm_impl_sse2.h @@ -11,8 +11,9 @@ #include <emmintrin.h> // SSE2 #include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_idct.h" // for cospi constants #include "vp9/encoder/x86/vp9_dct_sse2.h" +#include "vpx_dsp/txfm_common.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" // TODO(jingning) The high bit-depth functions need rework for performance. diff --git a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c index 06c748d34..341283feb 100644 --- a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c +++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c @@ -15,7 +15,6 @@ #include "vpx_ports/mem.h" #if CONFIG_VP9_HIGHBITDEPTH -// from vp9_idct.h: typedef int32_t tran_low_t; void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, diff --git a/vpx_dsp/x86/txfm_common_sse2.h b/vpx_dsp/x86/txfm_common_sse2.h new file mode 100644 index 000000000..536b20687 --- /dev/null +++ b/vpx_dsp/x86/txfm_common_sse2.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_ +#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_ + +#include <emmintrin.h> +#include "vpx/vpx_integer.h" + +#define pair_set_epi16(a, b) \ + _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ + (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) + +#define dual_set_epi16(a, b) \ + _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ + (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) + +#define octa_set_epi16(a, b, c, d, e, f, g, h) \ + _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ + (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) + +#endif // VPX_DSP_X86_TXFM_COMMON_SSE2_H_ |