diff options
author | Linfeng Zhang <linfengz@google.com> | 2017-02-14 15:39:37 -0800 |
---|---|---|
committer | Linfeng Zhang <linfengz@google.com> | 2017-02-14 17:25:52 -0800 |
commit | e07e74fb0f548a2ceb72da4d3264541ffc807db4 (patch) | |
tree | d96a84babc48f5fd1737eb1d75193cbaf60ce54a /vpx_dsp | |
parent | 429e6528097850b08b675e1fa8d75eef59a10e32 (diff) | |
download | libvpx-e07e74fb0f548a2ceb72da4d3264541ffc807db4.tar libvpx-e07e74fb0f548a2ceb72da4d3264541ffc807db4.tar.gz libvpx-e07e74fb0f548a2ceb72da4d3264541ffc807db4.tar.bz2 libvpx-e07e74fb0f548a2ceb72da4d3264541ffc807db4.zip |
Add vpx_highbd_idct16x16_38_add_c()
When eob is less than or equal to 38 for high-bitdepth 16x16 idct,
call this function.
BUG=webm:1301
Change-Id: I09167f89d29c401f9c36710b0fd2d02644052060
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/inv_txfm.c | 29 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 7 |
2 files changed, 36 insertions, 0 deletions
diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index 5cfc8e0bb..555205e65 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -2082,6 +2082,35 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { output[15] = HIGHBD_WRAPLOW(-x1, bd); } +void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + int i, j; + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 8x8 area, we only need to calculate first 8 rows here. + for (i = 0; i < 8; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + uint16_t *destT = dest; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + destT[i] = highbd_clip_pixel_add(destT[i], + ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + destT += stride; + } + } +} + void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 47616d638..0f8f50899 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -673,6 +673,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; } else { add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; @@ -730,6 +732,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; + $vpx_highbd_idct16x16_38_add_neon=vpx_highbd_idct16x16_256_add_neon; + $vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add sse2/; } # CONFIG_EMULATE_HARDWARE |