diff options
28 files changed, 395 insertions, 414 deletions
diff --git a/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/vp9/common/arm/neon/vp9_iht4x4_add_neon.c index 45040d745..dd1ea03b6 100644 --- a/vp9/common/arm/neon/vp9_iht4x4_add_neon.c +++ b/vp9/common/arm/neon/vp9_iht4x4_add_neon.c @@ -134,8 +134,8 @@ static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16, *q9s16 = vcombine_s16(d18s16, d19s16); } -void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { +void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { uint8x8_t d26u8, d27u8; int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; uint32x2_t d26u32, d27u32; @@ -151,7 +151,7 @@ void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, switch (tx_type) { case 0: // idct_idct is not supported. Fall back to C - vp9_iht4x4_16_add_c(input, dest, dest_stride, tx_type); + vp9_iht4x4_16_add_c(input, dest, stride, tx_type); return; case 1: // iadst_idct // generate constants @@ -203,11 +203,11 @@ void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, q9s16 = vrshrq_n_s16(q9s16, 4); d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); - dest += dest_stride; + dest += stride; d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); - dest += dest_stride; + dest += stride; d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); - dest += dest_stride; + dest += stride; d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); @@ -217,10 +217,10 @@ void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); - dest -= dest_stride; + dest -= stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); - dest -= dest_stride; + dest -= stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); - dest -= dest_stride; + dest -= stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); } diff --git a/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/vp9/common/arm/neon/vp9_iht8x8_add_neon.c index 5f54e6b90..1c739861c 100644 --- a/vp9/common/arm/neon/vp9_iht8x8_add_neon.c +++ b/vp9/common/arm/neon/vp9_iht8x8_add_neon.c @@ -406,8 +406,8 @@ static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16, *q15s16 = vsubq_s16(q5s16, q4s16); } -void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { +void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { int i; uint8_t *d1, *d2; uint8x8_t d0u8, d1u8, d2u8, d3u8; @@ -429,7 +429,7 @@ void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, switch (tx_type) { case 0: // idct_idct is not supported. Fall back to C - vp9_iht8x8_64_add_c(input, dest, dest_stride, tx_type); + vp9_iht8x8_64_add_c(input, dest, stride, tx_type); return; case 1: // iadst_idct // generate IDCT constants @@ -508,13 +508,13 @@ void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, } d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; + d1 += stride; d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; + d1 += stride; d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; + d1 += stride; d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; + d1 += stride; q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64)); q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64)); @@ -529,12 +529,12 @@ void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; + d2 += stride; vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; + d2 += stride; vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; + d2 += stride; vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; + d2 += stride; } } diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c index 2d4839174..f6b29265e 100644 --- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c @@ -21,8 +21,8 @@ #include "vpx_ports/mem.h" #if HAVE_DSPR2 -void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride, int tx_type) { +void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[4 * 4]); int16_t *outptr = out; @@ -37,7 +37,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, switch (tx_type) { case DCT_DCT: // DCT in both horizontal and vertical vpx_idct4_rows_dspr2(input, outptr); - vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); + vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride); break; case ADST_DCT: // ADST in vertical, DCT in horizontal vpx_idct4_rows_dspr2(input, outptr); @@ -48,8 +48,8 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, iadst4_dspr2(outptr, temp_out); for (j = 0; j < 4; ++j) - dest[j * dest_stride + i] = clip_pixel( - ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + + dest[j * stride + i]); outptr += 4; } @@ -66,7 +66,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, temp_in[i * 4 + j] = out[j * 4 + i]; } } - vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); + vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, stride); break; case ADST_ADST: // ADST in both directions for (i = 0; i < 4; ++i) { @@ -80,8 +80,8 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, iadst4_dspr2(temp_in, temp_out); for (j = 0; j < 4; ++j) - dest[j * dest_stride + i] = clip_pixel( - ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + + dest[j * stride + i]); } break; default: printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break; diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c index 86896f04c..b945e307e 100644 --- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c @@ -20,8 +20,8 @@ #include "vpx_ports/mem.h" #if HAVE_DSPR2 -void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride, int tx_type) { +void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; @@ -34,7 +34,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, switch (tx_type) { case DCT_DCT: // DCT in both horizontal and vertical idct8_rows_dspr2(input, outptr, 8); - idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); + idct8_columns_add_blk_dspr2(&out[0], dest, stride); break; case ADST_DCT: // ADST in vertical, DCT in horizontal idct8_rows_dspr2(input, outptr, 8); @@ -43,8 +43,8 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, iadst8_dspr2(&out[i * 8], temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = clip_pixel( - ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * stride + i]); } break; case DCT_ADST: // DCT in vertical, ADST in horizontal @@ -59,7 +59,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, temp_in[i * 8 + j] = out[j * 8 + i]; } } - idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); + idct8_columns_add_blk_dspr2(&temp_in[0], dest, stride); break; case ADST_ADST: // ADST in both directions for (i = 0; i < 8; ++i) { @@ -74,8 +74,8 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, iadst8_dspr2(temp_in, temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = clip_pixel( - ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * stride + i]); } break; default: printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break; diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index abef06763..088b004f5 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -48,16 +48,16 @@ specialize qw/vp9_filter_by_weight8x8 sse2 msa/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; - add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; } else { - add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; specialize qw/vp9_iht4x4_16_add sse2/; - add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; specialize qw/vp9_iht8x8_64_add sse2/; add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; @@ -66,16 +66,16 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { } else { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; - add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; } else { - add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/; add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; @@ -101,9 +101,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd"; + add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type, int bd"; - add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd"; + add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type, int bd"; add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd"; } diff --git a/vpx_dsp/arm/highbd_idct4x4_add_neon.c b/vpx_dsp/arm/highbd_idct4x4_add_neon.c index 96625b98b..1b2b5714f 100644 --- a/vpx_dsp/arm/highbd_idct4x4_add_neon.c +++ b/vpx_dsp/arm/highbd_idct4x4_add_neon.c @@ -15,7 +15,7 @@ #include "vpx_dsp/inv_txfm.h" void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest8, - int dest_stride, int bd) { + int stride, int bd) { int i; const int16x8_t max = vdupq_n_s16((1 << bd) - 1); const tran_low_t out0 = dct_const_round_shift(input[0] * cospi_16_64); @@ -29,15 +29,15 @@ void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 2; i++) { d0 = vld1_u16(dest); - d1 = vld1_u16(dest + dest_stride); + d1 = vld1_u16(dest + stride); a = vreinterpretq_s16_u16(vcombine_u16(d0, d1)); a = vaddq_s16(dc, a); a = vminq_s16(a, max); b = vqshluq_n_s16(a, 0); vst1_u16(dest, vget_low_u16(b)); - dest += dest_stride; + dest += stride; vst1_u16(dest, vget_high_u16(b)); - dest += dest_stride; + dest += stride; } } @@ -105,7 +105,7 @@ static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis, } void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest8, - int dest_stride, int bd) { + int stride, int bd) { DECLARE_ALIGNED(16, static const int32_t, kCospi32[4]) = { 0, 15137, 11585, 6270 }; const int16x8_t max = vdupq_n_s16((1 << bd) - 1); @@ -149,11 +149,11 @@ void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest8, } d0 = vreinterpret_s16_u16(vld1_u16(dst)); - dst += dest_stride; + dst += stride; d1 = vreinterpret_s16_u16(vld1_u16(dst)); - dst += dest_stride; + dst += stride; d2 = vreinterpret_s16_u16(vld1_u16(dst)); - dst += dest_stride; + dst += stride; d3 = vreinterpret_s16_u16(vld1_u16(dst)); d01 = vcombine_s16(d0, d1); d32 = vcombine_s16(d3, d2); @@ -168,10 +168,10 @@ void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest8, d32_u16 = vqshluq_n_s16(d32, 0); vst1_u16(dest, vget_low_u16(d01_u16)); - dest += dest_stride; + dest += stride; vst1_u16(dest, vget_high_u16(d01_u16)); - dest += dest_stride; + dest += stride; vst1_u16(dest, vget_high_u16(d32_u16)); - dest += dest_stride; + dest += stride; vst1_u16(dest, vget_low_u16(d32_u16)); } diff --git a/vpx_dsp/arm/idct16x16_1_add_neon.asm b/vpx_dsp/arm/idct16x16_1_add_neon.asm index e3c0c5210..d648840df 100644 --- a/vpx_dsp/arm/idct16x16_1_add_neon.asm +++ b/vpx_dsp/arm/idct16x16_1_add_neon.asm @@ -15,12 +15,11 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vpx_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) +;void vpx_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest -; r2 int dest_stride) +; r2 int stride) |vpx_idct16x16_1_add_neon| PROC ldrsh r0, [r0] diff --git a/vpx_dsp/arm/idct16x16_1_add_neon.c b/vpx_dsp/arm/idct16x16_1_add_neon.c index f1e49ff51..266f97610 100644 --- a/vpx_dsp/arm/idct16x16_1_add_neon.c +++ b/vpx_dsp/arm/idct16x16_1_add_neon.c @@ -15,7 +15,7 @@ #include "vpx_ports/mem.h" void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { uint8x8_t d2u8, d3u8, d30u8, d31u8; uint64x1_t d2u64, d3u64, d4u64, d5u64; uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; @@ -33,10 +33,10 @@ void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, for (j = 0; j < 2; j++) { d2u64 = vld1_u64((const uint64_t *)d1); d3u64 = vld1_u64((const uint64_t *)(d1 + 8)); - d1 += dest_stride; + d1 += stride; d4u64 = vld1_u64((const uint64_t *)d1); d5u64 = vld1_u64((const uint64_t *)(d1 + 8)); - d1 += dest_stride; + d1 += stride; q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); @@ -50,10 +50,10 @@ void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; + d2 += stride; vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d31u8)); - d2 += dest_stride; + d2 += stride; } } } diff --git a/vpx_dsp/arm/idct16x16_add_neon.asm b/vpx_dsp/arm/idct16x16_add_neon.asm index 27ad17ac2..ea6b099d3 100644 --- a/vpx_dsp/arm/idct16x16_add_neon.asm +++ b/vpx_dsp/arm/idct16x16_add_neon.asm @@ -287,14 +287,14 @@ idct16x16_256_add_neon_pass1 ; int16_t *pass1_output, ; int16_t skip_adding, ; uint8_t *dest, -; int dest_stride) +; int stride) ; ; r0 const int16_t *src ; r1 int16_t *output ; r2 int16_t *pass1_output ; r3 int16_t skip_adding ; r4 uint8_t *dest -; r5 int dest_stride +; r5 int stride ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 @@ -601,7 +601,7 @@ idct16x16_256_add_neon_pass2 ldr r7, [sp, #28] ; dest used to save element 0-7 mov r9, r7 ; save dest pointer for later use - ldr r8, [sp, #32] ; load dest_stride + ldr r8, [sp, #32] ; load stride ; stage 7 ; load the data in pass1 @@ -615,8 +615,8 @@ idct16x16_256_add_neon_pass2 vadd.s16 q13, q1, q14 ; step2[1] + step2[14] vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q12, q12, d12 ; + dest[j * stride + i] + vaddw.u8 q13, q13, d13 ; + dest[j * stride + i] vqmovun.s16 d12, q12 ; clip pixel vqmovun.s16 d13, q13 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data @@ -629,8 +629,8 @@ idct16x16_256_add_neon_pass2 vadd.s16 q13, q11, q4 ; step2[3] + step2[12] vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q12, q12, d12 ; + dest[j * stride + i] + vaddw.u8 q13, q13, d13 ; + dest[j * stride + i] vqmovun.s16 d12, q12 ; clip pixel vqmovun.s16 d13, q13 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data @@ -647,8 +647,8 @@ idct16x16_256_add_neon_pass2 vadd.s16 q13, q1, q2 ; step2[5] + step2[10] vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q12, q12, d12 ; + dest[j * stride + i] + vaddw.u8 q13, q13, d13 ; + dest[j * stride + i] vqmovun.s16 d12, q12 ; clip pixel vqmovun.s16 d13, q13 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data @@ -661,8 +661,8 @@ idct16x16_256_add_neon_pass2 vadd.s16 q13, q11, q8 ; step2[7] + step2[8] vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q12, q12, d12 ; + dest[j * stride + i] + vaddw.u8 q13, q13, d13 ; + dest[j * stride + i] vqmovun.s16 d12, q12 ; clip pixel vqmovun.s16 d13, q13 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data @@ -674,42 +674,42 @@ idct16x16_256_add_neon_pass2 ; store the data output 8,9,10,11,12,13,14,15 vrshr.s16 q8, q8, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q8, q8, d12 ; + dest[j * dest_stride + i] + vaddw.u8 q8, q8, d12 ; + dest[j * stride + i] vqmovun.s16 d12, q8 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data vld1.64 {d12}, [r7], r8 ; load destinatoin data vrshr.s16 q9, q9, #6 - vaddw.u8 q9, q9, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q9, q9, d13 ; + dest[j * stride + i] vqmovun.s16 d13, q9 ; clip pixel vst1.64 {d13}, [r9], r8 ; store the data vld1.64 {d13}, [r7], r8 ; load destinatoin data vrshr.s16 q2, q2, #6 - vaddw.u8 q2, q2, d12 ; + dest[j * dest_stride + i] + vaddw.u8 q2, q2, d12 ; + dest[j * stride + i] vqmovun.s16 d12, q2 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data vld1.64 {d12}, [r7], r8 ; load destinatoin data vrshr.s16 q3, q3, #6 - vaddw.u8 q3, q3, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q3, q3, d13 ; + dest[j * stride + i] vqmovun.s16 d13, q3 ; clip pixel vst1.64 {d13}, [r9], r8 ; store the data vld1.64 {d13}, [r7], r8 ; load destinatoin data vrshr.s16 q4, q4, #6 - vaddw.u8 q4, q4, d12 ; + dest[j * dest_stride + i] + vaddw.u8 q4, q4, d12 ; + dest[j * stride + i] vqmovun.s16 d12, q4 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data vld1.64 {d12}, [r7], r8 ; load destinatoin data vrshr.s16 q5, q5, #6 - vaddw.u8 q5, q5, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q5, q5, d13 ; + dest[j * stride + i] vqmovun.s16 d13, q5 ; clip pixel vst1.64 {d13}, [r9], r8 ; store the data vld1.64 {d13}, [r7], r8 ; load destinatoin data vrshr.s16 q14, q14, #6 - vaddw.u8 q14, q14, d12 ; + dest[j * dest_stride + i] + vaddw.u8 q14, q14, d12 ; + dest[j * stride + i] vqmovun.s16 d12, q14 ; clip pixel vst1.64 {d12}, [r9], r8 ; store the data vld1.64 {d12}, [r7], r8 ; load destinatoin data vrshr.s16 q15, q15, #6 - vaddw.u8 q15, q15, d13 ; + dest[j * dest_stride + i] + vaddw.u8 q15, q15, d13 ; + dest[j * stride + i] vqmovun.s16 d13, q15 ; clip pixel vst1.64 {d13}, [r9], r8 ; store the data b end_idct16x16_pass2 @@ -789,14 +789,14 @@ end_idct16x16_pass2 ; int16_t *pass1_output, ; int16_t skip_adding, ; uint8_t *dest, -; int dest_stride) +; int stride) ; ; r0 const tran_low_t *src ; r1 int16_t *output ; r2 int16_t *pass1_output ; r3 int16_t skip_adding ; r4 uint8_t *dest -; r5 int dest_stride +; r5 int stride |vpx_idct16x16_256_add_neon_pass2_tran_low| PROC LOAD_TRAN_LOW_TO_S16X2 d16, d17, d18, d19, r0 diff --git a/vpx_dsp/arm/idct16x16_add_neon.c b/vpx_dsp/arm/idct16x16_add_neon.c index 4e22b5520..f4eb24615 100644 --- a/vpx_dsp/arm/idct16x16_add_neon.c +++ b/vpx_dsp/arm/idct16x16_add_neon.c @@ -270,7 +270,7 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, const int16x8_t s6, const int16x8_t s7, int16_t *out, int16_t *pass1_output, int16_t skip_adding, uint8_t *dest, - int dest_stride) { + int stride) { uint8_t *d; uint8x8_t d12u8, d13u8; int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; @@ -522,9 +522,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, q1s16 = vld1q_s16(pass1_output); pass1_output += 8; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q12s16 = vaddq_s16(q0s16, q15s16); q13s16 = vaddq_s16(q1s16, q14s16); @@ -537,9 +537,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; + d += stride; q14s16 = vsubq_s16(q1s16, q14s16); q15s16 = vsubq_s16(q0s16, q15s16); @@ -548,9 +548,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, q11s16 = vld1q_s16(pass1_output); pass1_output += 8; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q12s16 = vaddq_s16(q10s16, q5s16); q13s16 = vaddq_s16(q11s16, q4s16); q12s16 = vrshrq_n_s16(q12s16, 6); @@ -562,9 +562,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; + d += stride; q4s16 = vsubq_s16(q11s16, q4s16); q5s16 = vsubq_s16(q10s16, q5s16); @@ -573,9 +573,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, q1s16 = vld1q_s16(pass1_output); pass1_output += 8; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q12s16 = vaddq_s16(q0s16, q3s16); q13s16 = vaddq_s16(q1s16, q2s16); q12s16 = vrshrq_n_s16(q12s16, 6); @@ -587,9 +587,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; + d += stride; q2s16 = vsubq_s16(q1s16, q2s16); q3s16 = vsubq_s16(q0s16, q3s16); @@ -597,9 +597,9 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, pass1_output += 8; q11s16 = vld1q_s16(pass1_output); d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q12s16 = vaddq_s16(q10s16, q9s16); q13s16 = vaddq_s16(q11s16, q8s16); q12s16 = vrshrq_n_s16(q12s16, 6); @@ -611,69 +611,69 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; + d += stride; q8s16 = vsubq_s16(q11s16, q8s16); q9s16 = vsubq_s16(q10s16, q9s16); // store the data out 8,9,10,11,12,13,14,15 d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q8s16 = vrshrq_n_s16(q8s16, 6); q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q9s16 = vrshrq_n_s16(q9s16, 6); q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q2s16 = vrshrq_n_s16(q2s16, 6); q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q3s16 = vrshrq_n_s16(q3s16, 6); q3u16 = vaddw_u8(vreinterpretq_u16_s16(q3s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q3u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q4s16 = vrshrq_n_s16(q4s16, 6); q4u16 = vaddw_u8(vreinterpretq_u16_s16(q4s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q4u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q5s16 = vrshrq_n_s16(q5s16, 6); q5u16 = vaddw_u8(vreinterpretq_u16_s16(q5s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q5u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; + dest += stride; q14s16 = vrshrq_n_s16(q14s16, 6); q14u16 = vaddw_u8(vreinterpretq_u16_s16(q14s16), vreinterpret_u8_s64(d12s64)); d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q14u16)); vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; + d += stride; d12s64 = vld1_s64((int64_t *)dest); q15s16 = vrshrq_n_s16(q15s16, 6); @@ -803,7 +803,7 @@ static void idct16x16_256_add_neon_pass2(const int16x8_t s0, const int16x8_t s1, void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, int16_t *out, int16_t *pass1_output, int16_t skip_adding, uint8_t *dest, - int dest_stride) { + int stride) { int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; int16x8x2_t q0x2s16; @@ -833,7 +833,7 @@ void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, int16_t *out, idct16x16_256_add_neon_pass2(q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16, out, pass1_output, skip_adding, - dest, dest_stride); + dest, stride); } #if CONFIG_VP9_HIGHBITDEPTH @@ -841,7 +841,7 @@ void vpx_idct16x16_256_add_neon_pass2_tran_low(const tran_low_t *src, int16_t *out, int16_t *pass1_output, int16_t skip_adding, - uint8_t *dest, int dest_stride) { + uint8_t *dest, int stride) { int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; int16x8x2_t q0x2s16; @@ -871,7 +871,7 @@ void vpx_idct16x16_256_add_neon_pass2_tran_low(const tran_low_t *src, idct16x16_256_add_neon_pass2(q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16, out, pass1_output, skip_adding, - dest, dest_stride); + dest, stride); } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vpx_dsp/arm/idct16x16_neon.c b/vpx_dsp/arm/idct16x16_neon.c index bcbbf4b6d..47366bcb7 100644 --- a/vpx_dsp/arm/idct16x16_neon.c +++ b/vpx_dsp/arm/idct16x16_neon.c @@ -15,7 +15,7 @@ void vpx_idct16x16_256_add_neon_pass1(const int16_t *input, int16_t *output); void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, int16_t *output, int16_t *pass1_output, int16_t skip_adding, uint8_t *dest, - int dest_stride); + int stride); #if CONFIG_VP9_HIGHBITDEPTH void vpx_idct16x16_256_add_neon_pass1_tran_low(const tran_low_t *input, int16_t *output); @@ -23,7 +23,7 @@ void vpx_idct16x16_256_add_neon_pass2_tran_low(const tran_low_t *src, int16_t *output, int16_t *pass1_output, int16_t skip_adding, - uint8_t *dest, int dest_stride); + uint8_t *dest, int stride); #else #define vpx_idct16x16_256_add_neon_pass1_tran_low \ vpx_idct16x16_256_add_neon_pass1 @@ -42,7 +42,7 @@ extern void vpx_pop_neon(int64_t *store); #endif // HAVE_NEON_ASM void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { #if HAVE_NEON_ASM int64_t store_reg[8]; #endif @@ -63,7 +63,7 @@ void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. vpx_idct16x16_256_add_neon_pass2_tran_low(input + 1, row_idct_output, - pass1_output, 0, dest, dest_stride); + pass1_output, 0, dest, stride); /* Parallel idct on the lower 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the @@ -73,9 +73,8 @@ void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vpx_idct16x16_256_add_neon_pass2_tran_low(input + 8 * 16 + 1, - row_idct_output + 8, pass1_output, - 0, dest, dest_stride); + vpx_idct16x16_256_add_neon_pass2_tran_low( + input + 8 * 16 + 1, row_idct_output + 8, pass1_output, 0, dest, stride); /* Parallel idct on the left 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the @@ -86,7 +85,7 @@ void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. vpx_idct16x16_256_add_neon_pass2(row_idct_output + 1, row_idct_output, - pass1_output, 1, dest, dest_stride); + pass1_output, 1, dest, stride); /* Parallel idct on the right 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the @@ -98,7 +97,7 @@ void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, // Then add the result to the destination data. vpx_idct16x16_256_add_neon_pass2(row_idct_output + 8 * 16 + 1, row_idct_output + 8, pass1_output, 1, - dest + 8, dest_stride); + dest + 8, stride); #if HAVE_NEON_ASM // restore d8-d15 register values. @@ -107,7 +106,7 @@ void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, } void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { #if HAVE_NEON_ASM int64_t store_reg[8]; #endif @@ -140,7 +139,7 @@ void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. vpx_idct16x16_256_add_neon_pass2(row_idct_output + 1, row_idct_output, - pass1_output, 1, dest, dest_stride); + pass1_output, 1, dest, stride); /* Parallel idct on the right 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the @@ -152,7 +151,7 @@ void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, // Then add the result to the destination data. vpx_idct16x16_256_add_neon_pass2(row_idct_output + 8 * 16 + 1, row_idct_output + 8, pass1_output, 1, - dest + 8, dest_stride); + dest + 8, stride); #if HAVE_NEON_ASM // restore d8-d15 register values. diff --git a/vpx_dsp/arm/idct32x32_1_add_neon.c b/vpx_dsp/arm/idct32x32_1_add_neon.c index 6be4b0122..165178db8 100644 --- a/vpx_dsp/arm/idct32x32_1_add_neon.c +++ b/vpx_dsp/arm/idct32x32_1_add_neon.c @@ -90,7 +90,7 @@ static INLINE void ST_16x8(uint8_t *d, int d_stride, uint8x16_t *q8u8, } void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; int i, j, dest_stride8; uint8_t *d; @@ -100,19 +100,19 @@ void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, out = dct_const_round_shift(out * cospi_16_64); a1 = ROUND_POWER_OF_TWO(out, 6); - dest_stride8 = dest_stride * 8; + dest_stride8 = stride * 8; if (a1 >= 0) { // diff_positive_32_32 a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; q0u8 = vdupq_n_u8((uint8_t)a1); for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop d = dest; for (j = 0; j < 4; j++) { - LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, - &q14u8, &q15u8); + LD_16x8(d, stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, &q14u8, + &q15u8); ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, &q14u8, &q15u8); - ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, - &q14u8, &q15u8); + ST_16x8(d, stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, &q14u8, + &q15u8); d += dest_stride8; } } @@ -123,12 +123,12 @@ void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop d = dest; for (j = 0; j < 4; j++) { - LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, - &q14u8, &q15u8); + LD_16x8(d, stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, &q14u8, + &q15u8); SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, &q14u8, &q15u8); - ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, - &q14u8, &q15u8); + ST_16x8(d, stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8, &q14u8, + &q15u8); d += dest_stride8; } } diff --git a/vpx_dsp/arm/idct4x4_1_add_neon.asm b/vpx_dsp/arm/idct4x4_1_add_neon.asm index cbfab361a..d83421e9e 100644 --- a/vpx_dsp/arm/idct4x4_1_add_neon.asm +++ b/vpx_dsp/arm/idct4x4_1_add_neon.asm @@ -15,12 +15,11 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) +;void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest -; r2 int dest_stride) +; r2 int stride) |vpx_idct4x4_1_add_neon| PROC ldrsh r0, [r0] diff --git a/vpx_dsp/arm/idct4x4_1_add_neon.c b/vpx_dsp/arm/idct4x4_1_add_neon.c index 21b138ce7..7fd1f2dfa 100644 --- a/vpx_dsp/arm/idct4x4_1_add_neon.c +++ b/vpx_dsp/arm/idct4x4_1_add_neon.c @@ -15,7 +15,7 @@ #include "vpx_dsp/inv_txfm.h" void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { int i; const int16_t out0 = dct_const_round_shift((int16_t)input[0] * cospi_16_64); const int16_t out1 = dct_const_round_shift(out0 * cospi_16_64); @@ -26,16 +26,16 @@ void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, uint8x8_t b; assert(!((intptr_t)dest % sizeof(uint32_t))); - assert(!(dest_stride % sizeof(uint32_t))); + assert(!(stride % sizeof(uint32_t))); for (i = 0; i < 2; i++) { d = vld1_lane_u32((const uint32_t *)dest, d, 0); - d = vld1_lane_u32((const uint32_t *)(dest + dest_stride), d, 1); + d = vld1_lane_u32((const uint32_t *)(dest + stride), d, 1); a = vaddw_u8(vreinterpretq_u16_s16(dc), vreinterpret_u8_u32(d)); b = vqmovun_s16(vreinterpretq_s16_u16(a)); vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(b), 0); - dest += dest_stride; + dest += stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(b), 1); - dest += dest_stride; + dest += stride; } } diff --git a/vpx_dsp/arm/idct4x4_add_neon.asm b/vpx_dsp/arm/idct4x4_add_neon.asm index 1550785a9..184d21894 100644 --- a/vpx_dsp/arm/idct4x4_add_neon.asm +++ b/vpx_dsp/arm/idct4x4_add_neon.asm @@ -18,11 +18,11 @@ INCLUDE vpx_dsp/arm/idct_neon.asm.S AREA Block, CODE, READONLY ; name this block of code -;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest -; r2 int dest_stride) +; r2 int stride) |vpx_idct4x4_16_add_neon| PROC @@ -167,7 +167,7 @@ vld1.32 {d27[1]}, [r1], r2 vld1.32 {d27[0]}, [r1] ; no post-increment - ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i] + ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * stride + i] vaddw.u8 q8, q8, d26 vaddw.u8 q9, q9, d27 diff --git a/vpx_dsp/arm/idct4x4_add_neon.c b/vpx_dsp/arm/idct4x4_add_neon.c index 5ccc95ce0..bff98cbc1 100644 --- a/vpx_dsp/arm/idct4x4_add_neon.c +++ b/vpx_dsp/arm/idct4x4_add_neon.c @@ -16,7 +16,7 @@ #include "vpx_dsp/txfm_common.h" void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { const uint8_t *dst = dest; const int16x4_t cospis = vld1_s16(kCospi); uint32x2_t dest01_u32 = vdup_n_u32(0); @@ -26,7 +26,7 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, uint16x8_t d01_u16, d32_u16; assert(!((intptr_t)dest % sizeof(uint32_t))); - assert(!(dest_stride % sizeof(uint32_t))); + assert(!(stride % sizeof(uint32_t))); // Rows a0 = load_tran_low_to_s16q(input); @@ -40,11 +40,11 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, a1 = vrshrq_n_s16(a1, 4); dest01_u32 = vld1_lane_u32((const uint32_t *)dst, dest01_u32, 0); - dst += dest_stride; + dst += stride; dest01_u32 = vld1_lane_u32((const uint32_t *)dst, dest01_u32, 1); - dst += dest_stride; + dst += stride; dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 1); - dst += dest_stride; + dst += stride; dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 0); d01_u16 = @@ -55,10 +55,10 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, d32 = vqmovun_s16(vreinterpretq_s16_u16(d32_u16)); vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d01), 0); - dest += dest_stride; + dest += stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d01), 1); - dest += dest_stride; + dest += stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 1); - dest += dest_stride; + dest += stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 0); } diff --git a/vpx_dsp/arm/idct8x8_1_add_neon.asm b/vpx_dsp/arm/idct8x8_1_add_neon.asm index e4531c6e9..29f678a03 100644 --- a/vpx_dsp/arm/idct8x8_1_add_neon.asm +++ b/vpx_dsp/arm/idct8x8_1_add_neon.asm @@ -15,12 +15,11 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vpx_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) +;void vpx_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest -; r2 int dest_stride) +; r2 int stride) |vpx_idct8x8_1_add_neon| PROC ldrsh r0, [r0] diff --git a/vpx_dsp/arm/idct8x8_1_add_neon.c b/vpx_dsp/arm/idct8x8_1_add_neon.c index c5ce4c396..0a6af4f45 100644 --- a/vpx_dsp/arm/idct8x8_1_add_neon.c +++ b/vpx_dsp/arm/idct8x8_1_add_neon.c @@ -15,7 +15,7 @@ #include "vpx_ports/mem.h" void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { int i; const int16_t out0 = dct_const_round_shift(input[0] * cospi_16_64); const int16_t out1 = dct_const_round_shift(out0 * cospi_16_64); @@ -28,13 +28,13 @@ void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, for (i = 0; i < 2; i++) { d0 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d1 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d2 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d3 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d0_u16 = vaddw_u8(dc_u16, d0); d1_u16 = vaddw_u8(dc_u16, d1); @@ -47,12 +47,12 @@ void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, d3 = vqmovun_s16(vreinterpretq_s16_u16(d3_u16)); vst1_u8(dest, d0); - dest += dest_stride; + dest += stride; vst1_u8(dest, d1); - dest += dest_stride; + dest += stride; vst1_u8(dest, d2); - dest += dest_stride; + dest += stride; vst1_u8(dest, d3); - dest += dest_stride; + dest += stride; } } diff --git a/vpx_dsp/arm/idct8x8_add_neon.asm b/vpx_dsp/arm/idct8x8_add_neon.asm index b58bf0ccd..2bfbcc5a5 100644 --- a/vpx_dsp/arm/idct8x8_add_neon.asm +++ b/vpx_dsp/arm/idct8x8_add_neon.asm @@ -200,11 +200,11 @@ MEND AREA Block, CODE, READONLY ; name this block of code -;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest -; r2 int dest_stride) +; r2 int stride) |vpx_idct8x8_64_add_neon| PROC push {r4-r9} @@ -270,7 +270,7 @@ vld1.64 {d6}, [r1], r2 vld1.64 {d7}, [r1] - ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i] + ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i] vaddw.u8 q8, q8, d0 vaddw.u8 q9, q9, d1 vaddw.u8 q10, q10, d2 @@ -305,11 +305,11 @@ bx lr ENDP ; |vpx_idct8x8_64_add_neon| -;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest -; r2 int dest_stride) +; r2 int stride) |vpx_idct8x8_12_add_neon| PROC push {r4-r9} @@ -469,7 +469,7 @@ vld1.64 {d6}, [r1], r2 vld1.64 {d7}, [r1] - ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i] + ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i] vaddw.u8 q8, q8, d0 vaddw.u8 q9, q9, d1 vaddw.u8 q10, q10, d2 diff --git a/vpx_dsp/arm/idct8x8_add_neon.c b/vpx_dsp/arm/idct8x8_add_neon.c index f35cd8913..7db229cf5 100644 --- a/vpx_dsp/arm/idct8x8_add_neon.c +++ b/vpx_dsp/arm/idct8x8_add_neon.c @@ -138,7 +138,7 @@ static INLINE void IDCT8x8_1D(const int16x4_t cospis0, const int16x4_t cospis1, static INLINE void add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, int16x8_t a3, int16x8_t a4, int16x8_t a5, int16x8_t a6, int16x8_t a7, uint8_t *dest, - const int dest_stride) { + const int stride) { const uint8_t *dst = dest; uint8x8_t d0, d1, d2, d3, d4, d5, d6, d7; uint16x8_t d0_u16, d1_u16, d2_u16, d3_u16, d4_u16, d5_u16, d6_u16, d7_u16; @@ -153,19 +153,19 @@ static INLINE void add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, a7 = vrshrq_n_s16(a7, 5); d0 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d1 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d2 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d3 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d4 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d5 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d6 = vld1_u8(dst); - dst += dest_stride; + dst += stride; d7 = vld1_u8(dst); d0_u16 = vaddw_u8(vreinterpretq_u16_s16(a0), d0); @@ -187,24 +187,24 @@ static INLINE void add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, d7 = vqmovun_s16(vreinterpretq_s16_u16(d7_u16)); vst1_u8(dest, d0); - dest += dest_stride; + dest += stride; vst1_u8(dest, d1); - dest += dest_stride; + dest += stride; vst1_u8(dest, d2); - dest += dest_stride; + dest += stride; vst1_u8(dest, d3); - dest += dest_stride; + dest += stride; vst1_u8(dest, d4); - dest += dest_stride; + dest += stride; vst1_u8(dest, d5); - dest += dest_stride; + dest += stride; vst1_u8(dest, d6); - dest += dest_stride; + dest += stride; vst1_u8(dest, d7); } void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 @@ -223,7 +223,7 @@ void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, IDCT8x8_1D(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); transpose_s16_8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); IDCT8x8_1D(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); - add8x8(a0, a1, a2, a3, a4, a5, a6, a7, dest, dest_stride); + add8x8(a0, a1, a2, a3, a4, a5, a6, a7, dest, stride); } static INLINE void IDCT8x4_1D(const int16x4_t cospis0, const int16x4_t cospisd0, @@ -281,7 +281,7 @@ static INLINE void IDCT8x4_1D(const int16x4_t cospis0, const int16x4_t cospisd0, } void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int stride) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x8_t cospisd = vaddq_s16(cospis, cospis); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 @@ -341,5 +341,5 @@ void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, transpose_s16_4x8(b8, b9, b10, b11, b4, b5, b6, b7, &a0, &a1, &a2, &a3); IDCT8x4_1D(cospis0, cospisd0, cospisd1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); - add8x8(a0, a1, a2, a3, a4, a5, a6, a7, dest, dest_stride); + add8x8(a0, a1, a2, a3, a4, a5, a6, a7, dest, stride); } diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index f3f543ddf..0f9aff189 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -67,7 +67,7 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { +void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int stride) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; @@ -84,10 +84,10 @@ void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; - dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1); - dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1); - dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1); - dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1); + dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); + dest[stride * 1] = clip_pixel_add(dest[stride * 1], e1); + dest[stride * 2] = clip_pixel_add(dest[stride * 2], e1); + dest[stride * 3] = clip_pixel_add(dest[stride * 3], e1); ip++; dest++; } @@ -138,8 +138,7 @@ void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); @@ -152,7 +151,7 @@ void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, dest[1] = clip_pixel_add(dest[1], a1); dest[2] = clip_pixel_add(dest[2], a1); dest[3] = clip_pixel_add(dest[3], a1); - dest += dest_stride; + dest += stride; } } @@ -1324,7 +1323,7 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, - int dest_stride, int bd) { + int stride, int bd) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; @@ -1343,14 +1342,10 @@ void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; - dest[dest_stride * 0] = - highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd); - dest[dest_stride * 1] = - highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd); - dest[dest_stride * 2] = - highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd); - dest[dest_stride * 3] = - highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd); + dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); + dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], e1, bd); + dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], e1, bd); + dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], e1, bd); ip++; dest++; } @@ -1413,7 +1408,7 @@ void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, - int dest_stride, int bd) { + int stride, int bd) { int i; tran_high_t a1; tran_low_t out = @@ -1428,7 +1423,7 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); - dest += dest_stride; + dest += stride; } } diff --git a/vpx_dsp/mips/inv_txfm_dspr2.h b/vpx_dsp/mips/inv_txfm_dspr2.h index edd54aec5..27881f0db 100644 --- a/vpx_dsp/mips/inv_txfm_dspr2.h +++ b/vpx_dsp/mips/inv_txfm_dspr2.h @@ -57,18 +57,15 @@ extern "C" { out; \ }) -void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, - int dest_stride); +void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output); -void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, - int dest_stride); +void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void iadst4_dspr2(const int16_t *input, int16_t *output); void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows); -void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, - int dest_stride); +void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void iadst8_dspr2(const int16_t *input, int16_t *output); void idct16_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows); -void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride); +void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void iadst16_dspr2(const int16_t *input, int16_t *output); #endif // #if HAVE_DSPR2 diff --git a/vpx_dsp/mips/itrans16_dspr2.c b/vpx_dsp/mips/itrans16_dspr2.c index 0ec0c2059..224181d6e 100644 --- a/vpx_dsp/mips/itrans16_dspr2.c +++ b/vpx_dsp/mips/itrans16_dspr2.c @@ -389,7 +389,7 @@ void idct16_rows_dspr2(const int16_t *input, int16_t *output, } } -void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { +void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int i; int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; int step1_8, step1_9, step1_10, step1_11; @@ -712,14 +712,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "add %[load6], %[step1_1], %[step1_6] \n\t" "add %[load6], %[load6], %[step1_14] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_2], %[step1_5] \n\t" @@ -731,14 +731,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "add %[load6], %[step1_3], %[step1_4] \n\t" "add %[load6], %[load6], %[step1_12] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "sub %[load5], %[step1_3], %[step1_4] \n\t" @@ -750,14 +750,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "sub %[load6], %[step1_2], %[step1_5] \n\t" "add %[load6], %[load6], %[step1_10] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "sub %[load5], %[step1_1], %[step1_6] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" @@ -769,14 +769,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "sub %[load6], %[step1_0], %[step1_7] \n\t" "add %[load6], %[load6], %[step1_8] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "sub %[load5], %[step1_0], %[step1_7] \n\t" @@ -788,14 +788,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "sub %[load6], %[step1_1], %[step1_6] \n\t" "sub %[load6], %[load6], %[step1_9] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "sub %[load5], %[step1_2], %[step1_5] \n\t" @@ -807,14 +807,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "sub %[load6], %[step1_3], %[step1_4] \n\t" "sub %[load6], %[load6], %[step1_11] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_3], %[step1_4] \n\t" @@ -826,14 +826,14 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "add %[load6], %[step1_2], %[step1_5] \n\t" "sub %[load6], %[load6], %[step1_13] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_1], %[step1_6] \n\t" @@ -845,7 +845,7 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { "add %[load6], %[step1_0], %[step1_7] \n\t" "sub %[load6], %[load6], %[step1_15] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" @@ -856,7 +856,7 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { : [load5] "=&r"(load5), [load6] "=&r"(load6), [load7] "=&r"(load7), [load8] "=&r"(load8), [dest_pix] "+r"(dest_pix) : - [cm] "r"(cm), [dest_stride] "r"(dest_stride), [step1_0] "r"(step1_0), + [cm] "r"(cm), [stride] "r"(stride), [step1_0] "r"(step1_0), [step1_1] "r"(step1_1), [step1_2] "r"(step1_2), [step1_3] "r"(step1_3), [step1_4] "r"(step1_4), [step1_5] "r"(step1_5), [step1_6] "r"(step1_6), [step1_7] "r"(step1_7), [step1_8] "r"(step1_8), [step1_9] "r"(step1_9), @@ -869,7 +869,7 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { } void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { + int stride) { DECLARE_ALIGNED(32, int16_t, out[16 * 16]); uint32_t pos = 45; @@ -880,11 +880,11 @@ void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, idct16_rows_dspr2(input, out, 16); // Then transform columns and add to dest - idct16_cols_add_blk_dspr2(out, dest, dest_stride); + idct16_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { + int stride) { DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *outptr = out; uint32_t i; @@ -924,11 +924,11 @@ void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, } // Then transform columns - idct16_cols_add_blk_dspr2(out, dest, dest_stride); + idct16_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { + int stride) { uint32_t pos = 45; int32_t out; int32_t r; @@ -975,13 +975,13 @@ void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" - "add %[dest], %[dest], %[dest_stride] \n\t" + "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) - : [dest_stride] "r"(dest_stride), [vector_a1] "r"(vector_a1)); + : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else { /* use quad-byte @@ -1005,13 +1005,13 @@ void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" - "add %[dest], %[dest], %[dest_stride] \n\t" + "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) - : [dest_stride] "r"(dest_stride), [vector_a1] "r"(vector_a1)); + : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } diff --git a/vpx_dsp/mips/itrans32_cols_dspr2.c b/vpx_dsp/mips/itrans32_cols_dspr2.c index ce25d55c9..ab9ffd281 100644 --- a/vpx_dsp/mips/itrans32_cols_dspr2.c +++ b/vpx_dsp/mips/itrans32_cols_dspr2.c @@ -13,8 +13,7 @@ #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 -void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6; int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13; int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19; @@ -49,7 +48,7 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, for (i = 0; i < 32; ++i) { dest_pix = dest + i; - dest_pix1 = dest + i + 31 * dest_stride; + dest_pix1 = dest + i + 31 * stride; __asm__ __volatile__( "lh %[load1], 2(%[input]) \n\t" @@ -738,14 +737,14 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_1], %[step2_30] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_2], %[step2_29] \n\t" @@ -755,18 +754,18 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_3], %[step2_28] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), [step1_0] "r"(step1_0), + : [cm] "r"(cm), [stride] "r"(stride), [step1_0] "r"(step1_0), [step1_1] "r"(step1_1), [step1_2] "r"(step1_2), [step1_3] "r"(step1_3), [step2_28] "r"(step2_28), [step2_29] "r"(step2_29), [step2_30] "r"(step2_30), @@ -782,29 +781,29 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), - [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), - [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); + : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), + [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), + [step3_15] "r"(step3_15)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" @@ -815,14 +814,14 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_5], %[step1_26] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_6], %[step1_25] \n\t" @@ -832,18 +831,18 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_7], %[step1_24] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), [step1_4] "r"(step1_4), + : [cm] "r"(cm), [stride] "r"(stride), [step1_4] "r"(step1_4), [step1_5] "r"(step1_5), [step1_6] "r"(step1_6), [step1_7] "r"(step1_7), [step1_24] "r"(step1_24), [step1_25] "r"(step1_25), [step1_26] "r"(step1_26), @@ -859,29 +858,29 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), - [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), - [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); + : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), + [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), + [step3_15] "r"(step3_15)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" @@ -892,14 +891,14 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_9], %[step1_22] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_10], %[step1_21] \n\t" @@ -909,18 +908,18 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_11], %[step1_20] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), [step1_8] "r"(step1_8), + : [cm] "r"(cm), [stride] "r"(stride), [step1_8] "r"(step1_8), [step1_9] "r"(step1_9), [step1_10] "r"(step1_10), [step1_11] "r"(step1_11), [step1_20] "r"(step1_20), [step1_21] "r"(step1_21), [step1_22] "r"(step1_22), @@ -936,29 +935,29 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), - [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), - [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); + : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), + [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), + [step3_15] "r"(step3_15)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" @@ -969,14 +968,14 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_13], %[step2_18] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_14], %[step2_17] \n\t" @@ -986,7 +985,7 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_15], %[step2_16] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" @@ -996,11 +995,11 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), - [step1_12] "r"(step1_12), [step1_13] "r"(step1_13), - [step1_14] "r"(step1_14), [step1_15] "r"(step1_15), - [step2_16] "r"(step2_16), [step2_17] "r"(step2_17), - [step2_18] "r"(step2_18), [step2_19] "r"(step2_19)); + : [cm] "r"(cm), [stride] "r"(stride), [step1_12] "r"(step1_12), + [step1_13] "r"(step1_13), [step1_14] "r"(step1_14), + [step1_15] "r"(step1_15), [step2_16] "r"(step2_16), + [step2_17] "r"(step2_17), [step2_18] "r"(step2_18), + [step2_19] "r"(step2_19)); step3_12 = ROUND_POWER_OF_TWO((step1_15 - step2_16), 6); step3_13 = ROUND_POWER_OF_TWO((step1_14 - step2_17), 6); @@ -1012,18 +1011,18 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" - "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t" + "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" @@ -1031,9 +1030,9 @@ void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) - : [cm] "r"(cm), [dest_stride] "r"(dest_stride), - [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), - [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); + : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), + [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), + [step3_15] "r"(step3_15)); input += 32; } diff --git a/vpx_dsp/mips/itrans32_dspr2.c b/vpx_dsp/mips/itrans32_dspr2.c index d71c5ffed..33e07f22a 100644 --- a/vpx_dsp/mips/itrans32_dspr2.c +++ b/vpx_dsp/mips/itrans32_dspr2.c @@ -836,7 +836,7 @@ static void idct32_rows_dspr2(const int16_t *input, int16_t *output, } void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { + int stride) { DECLARE_ALIGNED(32, int16_t, out[32 * 32]); int16_t *outptr = out; uint32_t pos = 45; @@ -850,7 +850,7 @@ void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest, idct32_rows_dspr2(input, outptr, 32); // Columns - vpx_idct32_cols_add_blk_dspr2(out, dest, dest_stride); + vpx_idct32_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest, diff --git a/vpx_dsp/mips/itrans4_dspr2.c b/vpx_dsp/mips/itrans4_dspr2.c index 516ea80f4..a9973b1f0 100644 --- a/vpx_dsp/mips/itrans4_dspr2.c +++ b/vpx_dsp/mips/itrans4_dspr2.c @@ -96,7 +96,7 @@ void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output) { } void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, - int dest_stride) { + int stride) { int16_t step_0, step_1, step_2, step_3; int Temp0, Temp1, Temp2, Temp3; const int const_2_power_13 = 8192; @@ -175,7 +175,7 @@ void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[Temp0], %[step_1], %[step_2] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" @@ -184,7 +184,7 @@ void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "sub %[Temp0], %[step_1], %[step_2] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" @@ -193,7 +193,7 @@ void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "sub %[Temp0], %[step_0], %[step_3] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" @@ -209,14 +209,13 @@ void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, : [const_2_power_13] "r"(const_2_power_13), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64), [cospi_24_64] "r"(cospi_24_64), [input] "r"(input), [cm] "r"(cm), - [dest_stride] "r"(dest_stride)); + [stride] "r"(stride)); input += 4; } } -void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[4 * 4]); int16_t *outptr = out; uint32_t pos = 45; @@ -230,11 +229,10 @@ void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, vpx_idct4_rows_dspr2(input, outptr); // Columns - vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); + vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride); } -void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { int a1, absa1; int r; int32_t out; @@ -271,10 +269,10 @@ void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, "lw %[t2], 0(%[dest]) \n\t" "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" "sw %[vector_a], 0(%[dest]) \n\t" - "add %[dest], %[dest], %[dest_stride] \n\t" + "add %[dest], %[dest], %[stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dest] "+&r"(dest) - : [dest_stride] "r"(dest_stride), [vector_a1] "r"(vector_a1)); + : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else { /* use quad-byte @@ -288,10 +286,10 @@ void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, "lw %[t2], 0(%[dest]) \n\t" "addu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" "sw %[vector_a], 0(%[dest]) \n\t" - "add %[dest], %[dest], %[dest_stride] \n\t" + "add %[dest], %[dest], %[stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dest] "+&r"(dest) - : [dest_stride] "r"(dest_stride), [vector_a1] "r"(vector_a1)); + : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } diff --git a/vpx_dsp/mips/itrans8_dspr2.c b/vpx_dsp/mips/itrans8_dspr2.c index 08a6c78b6..4a09d3301 100644 --- a/vpx_dsp/mips/itrans8_dspr2.c +++ b/vpx_dsp/mips/itrans8_dspr2.c @@ -192,8 +192,7 @@ void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows) { } } -void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, - int dest_stride) { +void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; int Temp0, Temp1, Temp2, Temp3; int i; @@ -358,7 +357,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[Temp0], %[step1_1], %[step1_6] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -367,7 +366,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[Temp0], %[step1_2], %[step1_5] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -376,7 +375,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "add %[Temp0], %[step1_3], %[step1_4] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -385,7 +384,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "sub %[Temp0], %[step1_3], %[step1_4] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -394,7 +393,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "sub %[Temp0], %[step1_2], %[step1_5] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -403,7 +402,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "sub %[Temp0], %[step1_1], %[step1_6] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -412,7 +411,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, "sub %[Temp0], %[step1_0], %[step1_7] \n\t" "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "sb %[Temp2], 0(%[dest_pix]) \n\t" - "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" + "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" @@ -432,14 +431,13 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, [cospi_4_64] "r"(cospi_4_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64), [input] "r"(input), [cm] "r"(cm), - [dest_stride] "r"(dest_stride)); + [stride] "r"(stride)); input += 8; } } -void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; uint32_t pos = 45; @@ -451,11 +449,10 @@ void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, idct8_rows_dspr2(input, outptr, 8); // Then transform columns and add to dest - idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); + idct8_columns_add_blk_dspr2(&out[0], dest, stride); } -void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; uint32_t pos = 45; @@ -490,11 +487,10 @@ void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, : [outptr] "r"(outptr)); // Then transform columns and add to dest - idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); + idct8_columns_add_blk_dspr2(&out[0], dest, stride); } -void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride) { +void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { uint32_t pos = 45; int32_t out; int32_t r; @@ -533,11 +529,11 @@ void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" - "add %[dest], %[dest], %[dest_stride] \n\t" + "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [dest] "+&r"(dest) - : [dest_stride] "r"(dest_stride), [vector_a1] "r"(vector_a1)); + : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else { /* use quad-byte @@ -555,11 +551,11 @@ void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" - "add %[dest], %[dest], %[dest_stride] \n\t" + "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [dest] "+r"(dest) - : [dest_stride] "r"(dest_stride), [vector_a1] "r"(vector_a1)); + : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 6d116559d..02efb44e1 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -612,192 +612,192 @@ if (vpx_config("CONFIG_VP9") eq "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_16_add sse2/; - add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct4x4_1_add neon/; - add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_1_add sse2/; - add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; } else { - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_16_add neon sse2/; - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_1_add neon sse2/; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_64_add neon sse2/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_12_add neon sse2/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_1_add neon sse2/; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_256_add neon sse2/; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_10_add neon sse2/; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_1_add neon sse2/; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_135_add neon sse2/, "$ssse3_x86_64"; # Need to add 135 eob idct32x32 implementations. $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_34_add neon sse2/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1_add neon sse2/; - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct4x4_16_add neon sse2/; - add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_64_add sse2/; - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_12_add sse2/; - add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_256_add sse2/; - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add sse2/; } # CONFIG_EMULATE_HARDWARE } else { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; } else { - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_1_add msa/; - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_16_add msa sse2/; } # CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH |