diff options
-rw-r--r-- | test/avg_test.cc | 12 | ||||
-rw-r--r-- | test/hadamard_test.cc | 4 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 36 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 2 | ||||
-rw-r--r-- | vpx_dsp/avg.c | 33 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 24 |
8 files changed, 74 insertions, 44 deletions
diff --git a/test/avg_test.cc b/test/avg_test.cc index bf88d83db..5ef402ab1 100644 --- a/test/avg_test.cc +++ b/test/avg_test.cc @@ -315,11 +315,13 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c), make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c))); +#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(C, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_c), make_tuple(64, &vpx_satd_c), make_tuple(256, &vpx_satd_c), make_tuple(1024, &vpx_satd_c))); +#endif #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( @@ -345,12 +347,14 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c))); +#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(SSE2, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_sse2), make_tuple(64, &vpx_satd_sse2), make_tuple(256, &vpx_satd_sse2), make_tuple(1024, &vpx_satd_sse2))); #endif +#endif #if HAVE_NEON INSTANTIATE_TEST_CASE_P( @@ -376,12 +380,14 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c))); +#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(NEON, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_neon), make_tuple(64, &vpx_satd_neon), make_tuple(256, &vpx_satd_neon), make_tuple(1024, &vpx_satd_neon))); -#endif +#endif // !CONFIG_VP9_HIGHBITDEPTH +#endif // HAVE_NEON #if HAVE_MSA INSTANTIATE_TEST_CASE_P( @@ -407,11 +413,13 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_msa, &vpx_int_pro_col_c))); +#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(MSA, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_msa), make_tuple(64, &vpx_satd_msa), make_tuple(256, &vpx_satd_msa), make_tuple(1024, &vpx_satd_msa))); -#endif +#endif // !CONFIG_VP9_HIGHBITDEPTH +#endif // HAVE_MSA } // namespace diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc index 317feba5f..72e8ede75 100644 --- a/test/hadamard_test.cc +++ b/test/hadamard_test.cc @@ -144,6 +144,7 @@ TEST_P(Hadamard8x8Test, VaryStride) { } } +#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_c)); @@ -166,6 +167,7 @@ INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_msa)); #endif // HAVE_MSA +#endif // !CONFIG_VP9_HIGHBITDEPTH class Hadamard16x16Test : public HadamardTestBase {}; @@ -210,6 +212,7 @@ TEST_P(Hadamard16x16Test, VaryStride) { } } +#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test, ::testing::Values(&vpx_hadamard_16x16_c)); @@ -227,4 +230,5 @@ INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test, INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test, ::testing::Values(&vpx_hadamard_16x16_msa)); #endif // HAVE_MSA +#endif // !CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 088b004f5..ecdce7c34 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -132,6 +132,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/vp9_highbd_block_error_8bit sse2 avx/; + add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size"; + specialize qw/vp9_block_error_fp/; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 59b90be14..7c809e155 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1815,7 +1815,9 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_VP9_HIGHBITDEPTH - { + // TODO(jingning): Implement integral projection functions for high bit-depth + // setting and remove this part of code. + if (xd->bd != 8) { unsigned int this_sad; tmp_mv->row = 0; tmp_mv->col = 0; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index aa6b208be..ca41b2dc3 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -590,21 +590,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, *out_dist_sum += dist << 4; } -#if CONFIG_VP9_HIGHBITDEPTH -static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, - int *skippable, int64_t *sse, BLOCK_SIZE bsize, - TX_SIZE tx_size) { - MACROBLOCKD *xd = &x->e_mbd; - unsigned int var_y, sse_y; - - (void)tx_size; - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y, - &sse_y); - *sse = INT_MAX; - *skippable = 0; - return; -} -#else static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, int *skippable, int64_t *sse, BLOCK_SIZE bsize, TX_SIZE tx_size) { @@ -624,6 +609,20 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; +#if CONFIG_VP9_HIGHBITDEPTH + // TODO(jingning): Implement the high bit-depth Hadamard transforms and + // remove this check condition. + if (xd->bd != 8) { + unsigned int var_y, sse_y; + (void)tx_size; + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, + &var_y, &sse_y); + *sse = INT_MAX; + *skippable = 0; + return; + } +#endif + (void)cpi; // The max tx_size passed in is TX_16X16. @@ -648,14 +647,14 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, switch (tx_size) { case TX_16X16: - vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff); + vpx_hadamard_16x16(src_diff, diff_stride, coeff); vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: - vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff); + vpx_hadamard_8x8(src_diff, diff_stride, coeff); vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, @@ -699,7 +698,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, if (*eob == 1) this_rdc->rate += (int)abs(qcoeff[0]); else if (*eob > 1) - this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4); + this_rdc->rate += vpx_satd(qcoeff, step << 4); this_rdc->dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2; } @@ -711,7 +710,6 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT); this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT); } -#endif static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, MACROBLOCK *x, MACROBLOCKD *xd, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 27d4e9d6d..ffacc0fa2 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -321,7 +321,7 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, return error; } -int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, +int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size) { int i; int64_t error = 0; diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c index 4d9abb8de..e4cd6cca7 100644 --- a/vpx_dsp/avg.c +++ b/vpx_dsp/avg.c @@ -67,9 +67,10 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride, // The order of the output coeff of the hadamard is not important. For // optimization purposes the final transpose may be skipped. void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, - int16_t *coeff) { + tran_low_t *coeff) { int idx; int16_t buffer[64]; + int16_t buffer2[64]; int16_t *tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit @@ -80,17 +81,19 @@ void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { - hadamard_col8(tmp_buf, 8, coeff); // tmp_buf: 12 bit - // dynamic range [-2040, 2040] - coeff += 8; // coeff: 15 bit - // dynamic range [-16320, 16320] + hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit + // dynamic range [-2040, 2040] + // buffer2: 15 bit + // dynamic range [-16320, 16320] ++tmp_buf; } + + for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; } // In place 16x16 2D Hadamard transform void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, - int16_t *coeff) { + tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 9 bit, dynamic range [-255, 255] @@ -101,15 +104,15 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, // coeff: 15 bit, dynamic range [-16320, 16320] for (idx = 0; idx < 64; ++idx) { - int16_t a0 = coeff[0]; - int16_t a1 = coeff[64]; - int16_t a2 = coeff[128]; - int16_t a3 = coeff[192]; + tran_low_t a0 = coeff[0]; + tran_low_t a1 = coeff[64]; + tran_low_t a2 = coeff[128]; + tran_low_t a3 = coeff[192]; - int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] - int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range - int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320] - int16_t b3 = (a2 - a3) >> 1; + tran_low_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] + tran_low_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range + tran_low_t b2 = (a2 + a3) >> 1; // [-16320, 16320] + tran_low_t b3 = (a2 - a3) >> 1; coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] coeff[64] = b1 + b3; @@ -122,7 +125,7 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, // coeff: 16 bits, dynamic range [-32640, 32640]. // length: value range {16, 64, 256, 1024}. -int vpx_satd_c(const int16_t *coeff, int length) { +int vpx_satd_c(const tran_low_t *coeff, int length) { int i; int satd = 0; for (i = 0; i < length; ++i) satd += abs(coeff[i]); diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index e7f4010de..3cb2011b8 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -885,14 +885,26 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; specialize qw/vpx_minmax_8x8 sse2 neon msa/; - add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; - specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64"; - add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; - specialize qw/vpx_hadamard_16x16 sse2 neon msa/; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff"; + specialize qw/vpx_hadamard_8x8/; + + add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff"; + specialize qw/vpx_hadamard_16x16/; + + add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length"; + specialize qw/vpx_satd/; + } else { + add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; + specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64"; + + add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; + specialize qw/vpx_hadamard_16x16 sse2 neon msa/; - add_proto qw/int vpx_satd/, "const int16_t *coeff, int length"; - specialize qw/vpx_satd sse2 neon msa/; + add_proto qw/int vpx_satd/, "const int16_t *coeff, int length"; + specialize qw/vpx_satd sse2 neon msa/; + } add_proto qw/void vpx_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height"; specialize qw/vpx_int_pro_row sse2 neon msa/; |