diff options
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/inv_txfm.c | 29 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 7 |
2 files changed, 36 insertions, 0 deletions
diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index 5cfc8e0bb..555205e65 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -2082,6 +2082,35 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { output[15] = HIGHBD_WRAPLOW(-x1, bd); } +void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + int i, j; + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 8x8 area, we only need to calculate first 8 rows here. + for (i = 0; i < 8; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + uint16_t *destT = dest; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + destT[i] = highbd_clip_pixel_add(destT[i], + ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + destT += stride; + } + } +} + void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 47616d638..0f8f50899 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -673,6 +673,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; } else { add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; @@ -730,6 +732,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; + $vpx_highbd_idct16x16_38_add_neon=vpx_highbd_idct16x16_256_add_neon; + $vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add sse2/; } # CONFIG_EMULATE_HARDWARE |