diff options
author | Scott LaVarnway <slavarnway@google.com> | 2017-10-26 09:45:06 -0700 |
---|---|---|
committer | Scott LaVarnway <slavarnway@google.com> | 2017-10-26 11:41:48 -0700 |
commit | 3bf02ad74af5602c153a318b04cc311acdc7584d (patch) | |
tree | d7271cf0c5a0ed39e024e2c9f7160fd93f7b7f07 /vpx_dsp | |
parent | 037e596f045f54369ba59b59757b8a7dfad9527c (diff) | |
download | libvpx-3bf02ad74af5602c153a318b04cc311acdc7584d.tar libvpx-3bf02ad74af5602c153a318b04cc311acdc7584d.tar.gz libvpx-3bf02ad74af5602c153a318b04cc311acdc7584d.tar.bz2 libvpx-3bf02ad74af5602c153a318b04cc311acdc7584d.zip |
vpx: hadamard: use ptrdiff_t instead of int for stride
Eliminates the following instruction for the x86 (64 bit)
intrinsic code:
movslq %esi,%rax
Change-Id: I8f5ebd40726f998708a668b0f52ea7a0576befae
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/arm/hadamard_neon.c | 4 | ||||
-rw-r--r-- | vpx_dsp/avg.c | 6 | ||||
-rw-r--r-- | vpx_dsp/mips/avg_msa.c | 6 | ||||
-rw-r--r-- | vpx_dsp/ppc/hadamard_vsx.c | 4 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 8 | ||||
-rw-r--r-- | vpx_dsp/x86/avg_intrin_avx2.c | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/avg_intrin_sse2.c | 4 |
7 files changed, 19 insertions, 17 deletions
diff --git a/vpx_dsp/arm/hadamard_neon.c b/vpx_dsp/arm/hadamard_neon.c index 79bedd848..523a63c6f 100644 --- a/vpx_dsp/arm/hadamard_neon.c +++ b/vpx_dsp/arm/hadamard_neon.c @@ -47,7 +47,7 @@ static void hadamard8x8_one_pass(int16x8_t *a0, int16x8_t *a1, int16x8_t *a2, *a7 = vaddq_s16(c1, c5); } -void vpx_hadamard_8x8_neon(const int16_t *src_diff, int src_stride, +void vpx_hadamard_8x8_neon(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int16x8_t a0 = vld1q_s16(src_diff); int16x8_t a1 = vld1q_s16(src_diff + src_stride); @@ -76,7 +76,7 @@ void vpx_hadamard_8x8_neon(const int16_t *src_diff, int src_stride, store_s16q_to_tran_low(coeff + 56, a7); } -void vpx_hadamard_16x16_neon(const int16_t *src_diff, int src_stride, +void vpx_hadamard_16x16_neon(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int i; diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c index e4cd6cca7..a7ac6d953 100644 --- a/vpx_dsp/avg.c +++ b/vpx_dsp/avg.c @@ -34,7 +34,7 @@ unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) { // src_diff: first pass, 9 bit, dynamic range [-255, 255] // second pass, 12 bit, dynamic range [-2040, 2040] -static void hadamard_col8(const int16_t *src_diff, int src_stride, +static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff) { int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; @@ -66,7 +66,7 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride, // The order of the output coeff of the hadamard is not important. For // optimization purposes the final transpose may be skipped. -void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, +void vpx_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; int16_t buffer[64]; @@ -92,7 +92,7 @@ void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, } // In place 16x16 2D Hadamard transform -void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, +void vpx_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { diff --git a/vpx_dsp/mips/avg_msa.c b/vpx_dsp/mips/avg_msa.c index 48b841969..d0ac7b8e2 100644 --- a/vpx_dsp/mips/avg_msa.c +++ b/vpx_dsp/mips/avg_msa.c @@ -56,7 +56,8 @@ uint32_t vpx_avg_4x4_msa(const uint8_t *src, int32_t src_stride) { return sum_out; } -void vpx_hadamard_8x8_msa(const int16_t *src, int src_stride, int16_t *dst) { +void vpx_hadamard_8x8_msa(const int16_t *src, ptrdiff_t src_stride, + int16_t *dst) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; @@ -80,7 +81,8 @@ void vpx_hadamard_8x8_msa(const int16_t *src, int src_stride, int16_t *dst) { ST_SH8(src0, src1, src2, src3, src4, src5, src6, src7, dst, 8); } -void vpx_hadamard_16x16_msa(const int16_t *src, int src_stride, int16_t *dst) { +void vpx_hadamard_16x16_msa(const int16_t *src, ptrdiff_t src_stride, + int16_t *dst) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v8i16 src11, src12, src13, src14, src15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; v8i16 tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; diff --git a/vpx_dsp/ppc/hadamard_vsx.c b/vpx_dsp/ppc/hadamard_vsx.c index 435e3eb5b..e279b3047 100644 --- a/vpx_dsp/ppc/hadamard_vsx.c +++ b/vpx_dsp/ppc/hadamard_vsx.c @@ -42,7 +42,7 @@ static void vpx_hadamard_s16_8x8_one_pass(int16x8_t v[8]) { v[7] = vec_add(c1, c5); } -void vpx_hadamard_8x8_vsx(const int16_t *src_diff, int src_stride, +void vpx_hadamard_8x8_vsx(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int16x8_t v[8]; @@ -71,7 +71,7 @@ void vpx_hadamard_8x8_vsx(const int16_t *src_diff, int src_stride, store_tran_low(v[7], 0, coeff + 56); } -void vpx_hadamard_16x16_vsx(const int16_t *src_diff, int src_stride, +void vpx_hadamard_16x16_vsx(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int i; const uint16x8_t ones = vec_splat_u16(1); diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 16b1f235a..bb54503fe 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -765,19 +765,19 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { specialize qw/vpx_minmax_8x8 sse2 neon msa/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff"; + add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2 neon vsx/, "$ssse3_x86_64"; - add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff"; + add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; specialize qw/vpx_hadamard_16x16 avx2 sse2 neon vsx/; add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length"; specialize qw/vpx_satd sse2 neon/; } else { - add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; + add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx/, "$ssse3_x86_64"; - add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; + add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_16x16 avx2 sse2 neon msa vsx/; add_proto qw/int vpx_satd/, "const int16_t *coeff, int length"; diff --git a/vpx_dsp/x86/avg_intrin_avx2.c b/vpx_dsp/x86/avg_intrin_avx2.c index 3fc00f6df..4dc759bb5 100644 --- a/vpx_dsp/x86/avg_intrin_avx2.c +++ b/vpx_dsp/x86/avg_intrin_avx2.c @@ -91,7 +91,7 @@ static void hadamard_col8x2_avx2(__m256i *in, int iter) { } } -static void hadamard_8x8x2_avx2(int16_t const *src_diff, int src_stride, +static void hadamard_8x8x2_avx2(int16_t const *src_diff, ptrdiff_t src_stride, int16_t *coeff) { __m256i src[8]; src[0] = _mm256_loadu_si256((const __m256i *)src_diff); @@ -131,7 +131,7 @@ static void hadamard_8x8x2_avx2(int16_t const *src_diff, int src_stride, _mm256_permute2x128_si256(src[6], src[7], 0x31)); } -void vpx_hadamard_16x16_avx2(int16_t const *src_diff, int src_stride, +void vpx_hadamard_16x16_avx2(int16_t const *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; #if CONFIG_VP9_HIGHBITDEPTH diff --git a/vpx_dsp/x86/avg_intrin_sse2.c b/vpx_dsp/x86/avg_intrin_sse2.c index 4e89e07e5..a235ba41d 100644 --- a/vpx_dsp/x86/avg_intrin_sse2.c +++ b/vpx_dsp/x86/avg_intrin_sse2.c @@ -214,7 +214,7 @@ static void hadamard_col8_sse2(__m128i *in, int iter) { } } -void vpx_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, +void vpx_hadamard_8x8_sse2(int16_t const *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { __m128i src[8]; src[0] = _mm_load_si128((const __m128i *)src_diff); @@ -246,7 +246,7 @@ void vpx_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, store_tran_low(src[7], coeff); } -void vpx_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, +void vpx_hadamard_16x16_sse2(int16_t const *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { |