diff options
author | Deb Mukherjee <debargha@google.com> | 2014-09-02 16:34:09 -0700 |
---|---|---|
committer | Deb Mukherjee <debargha@google.com> | 2014-09-11 19:56:33 -0700 |
commit | 10783d4f3a6188a01f9a993c56ab0013acd97214 (patch) | |
tree | 068914b26183a52c2cf7a084cfca333a94cf952f /vp9/common/vp9_rtcd_defs.pl | |
parent | 1e4136d35d0323f64817a0d9ae333ba9f6f731c3 (diff) | |
download | libvpx-10783d4f3a6188a01f9a993c56ab0013acd97214.tar libvpx-10783d4f3a6188a01f9a993c56ab0013acd97214.tar.gz libvpx-10783d4f3a6188a01f9a993c56ab0013acd97214.tar.bz2 libvpx-10783d4f3a6188a01f9a993c56ab0013acd97214.zip |
Adds high bitdepth transform functions and tests
Adds various high bitdepth transform functions and tests.
Much of the changes are related to using typedefs tran_low_t
and tran_high_t for the final transform cofficients and intermediate
stages of the transform computation respectively rather than fixed
types int16_t/int. When vp9_highbitdepth configure flag is off,
these map tp int16_t/int32_t, but when the flag is on, they map
to int32_t/int64_t to make space for needed extra precision.
Change-Id: I3c56de79e15b904d6f655b62ffae170729befdd8
Diffstat (limited to 'vp9/common/vp9_rtcd_defs.pl')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 407 |
1 files changed, 325 insertions, 82 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 1e030cf8d..32bcf9a77 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -6,6 +6,7 @@ print <<EOF #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" +#include "vp9/common/vp9_idct.h" struct macroblockd; @@ -329,68 +330,177 @@ $vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon; # # dct # -add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/; -$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct4x4_1_add/; + + add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct4x4_16_add/; + + add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct8x8_1_add/; + + add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct8x8_64_add/; + + add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct8x8_12_add/; + + add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct16x16_1_add/; -add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/; -$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon; + add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct16x16_256_add/; -add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/; -$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon; + add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct16x16_10_add/; -add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64"; -$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon; + add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct32x32_1024_add/; -add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64"; -$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon; + add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct32x32_34_add/; -add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/; -$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon; + add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct32x32_1_add/; -add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/; -$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon; + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht4x4_16_add/; + + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht8x8_64_add/; + + add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; + specialize qw/vp9_iht16x16_256_add/; + + # dct and add + + add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_iwht4x4_1_add/; + + add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_iwht4x4_16_add/; +} else { + add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/; + $vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon; -add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/; -$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon; + add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/; + $vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon; -add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/; -$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon; + add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/; + $vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon; -add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/; -$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon; + add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64"; + $vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon; -add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/; -$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon; + add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64"; + $vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon; -add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; -specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/; -$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon; + add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/; + $vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon; -add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; -specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/; -$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon; + add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/; + $vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon; + + add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/; + $vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon; + + add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/; + $vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon; + + add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/; + $vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon; + + add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/; + $vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon; + + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/; + $vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon; + + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/; + $vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon; + + add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; + specialize qw/vp9_iht16x16_256_add sse2 dspr2/; + + # dct and add + + add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_iwht4x4_1_add/; + + add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vp9_iwht4x4_16_add/; +} -add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type"; -specialize qw/vp9_iht16x16_256_add sse2 dspr2/; + +# High bitdepth functions +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { +# +# dct +# +add_proto qw/void vp9_high_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct4x4_1_add/; + +add_proto qw/void vp9_high_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct4x4_16_add/; + +add_proto qw/void vp9_high_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct8x8_1_add/; + +add_proto qw/void vp9_high_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct8x8_64_add/; + +add_proto qw/void vp9_high_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct8x8_10_add/; + +add_proto qw/void vp9_high_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct16x16_1_add/; + +add_proto qw/void vp9_high_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct16x16_256_add/; + +add_proto qw/void vp9_high_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct16x16_10_add/; + +add_proto qw/void vp9_high_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct32x32_1024_add/; + +add_proto qw/void vp9_high_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct32x32_34_add/; + +add_proto qw/void vp9_high_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_idct32x32_1_add/; + +add_proto qw/void vp9_high_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd"; +specialize qw/vp9_high_iht4x4_16_add/; + +add_proto qw/void vp9_high_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd"; +specialize qw/vp9_high_iht8x8_64_add/; + +add_proto qw/void vp9_high_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd"; +specialize qw/vp9_high_iht16x16_256_add/; # dct and add -add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_iwht4x4_1_add/; +add_proto qw/void vp9_high_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_iwht4x4_1_add/; -add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_iwht4x4_16_add/; +add_proto qw/void vp9_high_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; +specialize qw/vp9_high_iwht4x4_16_add/; +} # # Encoder functions below this point. @@ -706,23 +816,42 @@ add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; specialize qw/vp9_get_mb_ss/, "$sse2_x86inc"; # ENCODEMB INVOKE -add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"; -specialize qw/vp9_block_error avx2/, "$sse2_x86inc"; - add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; specialize qw/vp9_subtract_block neon/, "$sse2_x86inc"; -add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_fp neon/, "$ssse3_x86_64"; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { +# the transform coefficients are held in 32-bit +# values, so the assembler code for vp9_block_error can no longer be used. + add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp9_block_error/; -add_proto qw/void vp9_quantize_fp_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64"; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_fp/; -add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_b/, "$ssse3_x86_64"; + add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_fp_32x32/; -add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; + add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_b/; + + add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_b_32x32/; +} else { + add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp9_block_error avx2/; + + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_fp neon/, "$ssse3_x86_64"; + + add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64"; + + add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_b/, "$ssse3_x86_64"; + + add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_b_32x32/; +} # # Structured Similarity (SSIM) @@ -736,44 +865,86 @@ if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { } # fdct functions -add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type"; -specialize qw/vp9_fht4x4 sse2/; -add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type"; -specialize qw/vp9_fht8x8 sse2/; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_fht4x4/; + + add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_fht8x8/; + + add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_fht16x16/; + + add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fwht4x4/; + + add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct4x4_1/; -add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; -specialize qw/vp9_fht16x16 sse2/; + add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct4x4/; -add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; + add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct8x8_1/; -add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct4x4_1 sse2/; + add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct8x8/; -add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct4x4 sse2/; + add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct16x16_1/; -add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct8x8_1 sse2 neon/; + add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct16x16/; -add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64"; + add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct32x32_1/; -add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct16x16_1 sse2/; + add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct32x32/; -add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct16x16 sse2/; + add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct32x32_rd/; +} else { + add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_fht4x4 sse2/; + + add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_fht8x8 sse2/; + + add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_fht16x16 sse2/; + + add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; + + add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct4x4_1 sse2/; + + add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct4x4 sse2/; -add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct32x32_1 sse2/; + add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct8x8_1 sse2 neon/; -add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct32x32 sse2 avx2/; + add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64"; -add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct32x32_rd sse2 avx2/; + add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct16x16_1 sse2/; + + add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct16x16 sse2/; + + add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct32x32_1 sse2/; + + add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct32x32 sse2 avx2/; + + add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_fdct32x32_rd sse2 avx2/; +} # # Motion search @@ -1369,7 +1540,79 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_high_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vp9_high_12_mse8x8/; + # ENCODEMB INVOKE + + add_proto qw/int64_t vp9_high_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; + specialize qw/vp9_high_block_error/; + + add_proto qw/void vp9_high_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; + specialize qw/vp9_high_subtract_block/; + + add_proto qw/void vp9_high_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_high_quantize_fp/; + + add_proto qw/void vp9_high_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_high_quantize_fp_32x32/; + + add_proto qw/void vp9_high_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_high_quantize_b/; + + add_proto qw/void vp9_high_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_high_quantize_b_32x32/; + + # + # Structured Similarity (SSIM) + # + if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { + add_proto qw/void vp9_high_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; + specialize qw/vp9_high_ssim_parms_8x8/; + + add_proto qw/void vp9_high_ssim_parms_8x8_shift/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr, unsigned int bd, unsigned int shift"; + specialize qw/vp9_high_ssim_parms_8x8_shift/; + } + + # fdct functions + add_proto qw/void vp9_high_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_high_fht4x4/; + + add_proto qw/void vp9_high_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_high_fht8x8/; + + add_proto qw/void vp9_high_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp9_high_fht16x16/; + + add_proto qw/void vp9_high_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fwht4x4/; + + add_proto qw/void vp9_high_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct4x4/; + + add_proto qw/void vp9_high_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct8x8_1/; + + add_proto qw/void vp9_high_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct8x8/; + + add_proto qw/void vp9_high_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct16x16_1/; + + add_proto qw/void vp9_high_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct16x16/; + + add_proto qw/void vp9_high_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct32x32_1/; + + add_proto qw/void vp9_high_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct32x32/; + + add_proto qw/void vp9_high_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vp9_high_fdct32x32_rd/; + + add_proto qw/void vp9_high_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; + specialize qw/vp9_high_temporal_filter_apply/; + } +# End vp9_high encoder functions } # end encoder functions |