diff options
author | Jingning Han <jingning@google.com> | 2017-01-26 15:00:04 -0800 |
---|---|---|
committer | James Zern <jzern@google.com> | 2017-01-31 23:17:09 -0800 |
commit | 969957f9f2a124861145a0d18781b855e98caa54 (patch) | |
tree | 36beed91af1ea8f5191fe7f30d69d01b9a0f5dd4 /vpx_dsp/avg.c | |
parent | 9efc42f4f89eeb05aba384e9179281ece3be6429 (diff) | |
download | libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.gz libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.bz2 libvpx-969957f9f2a124861145a0d18781b855e98caa54.zip |
Fix real-time compression regression in hbd mode
This commit resolves the compression performance regression in
real-time encoding setting when high bit-depth mode is enabled.
The current solution temporarily disables the SIMD implementations
of vpx_satd, hadamard8x8, and hadamard16x16 in high bit-depth mode.
The commit makes the coding results bit-wise identical between
regular coding pipeline and high bit-depth at profile 0.
BUG=webm:1365
Change-Id: Icfb900821733749685370460a1a5a7e07f76f4bf
Diffstat (limited to 'vpx_dsp/avg.c')
-rw-r--r-- | vpx_dsp/avg.c | 33 |
1 files changed, 18 insertions, 15 deletions
diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c index 4d9abb8de..e4cd6cca7 100644 --- a/vpx_dsp/avg.c +++ b/vpx_dsp/avg.c @@ -67,9 +67,10 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride, // The order of the output coeff of the hadamard is not important. For // optimization purposes the final transpose may be skipped. void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, - int16_t *coeff) { + tran_low_t *coeff) { int idx; int16_t buffer[64]; + int16_t buffer2[64]; int16_t *tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit @@ -80,17 +81,19 @@ void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { - hadamard_col8(tmp_buf, 8, coeff); // tmp_buf: 12 bit - // dynamic range [-2040, 2040] - coeff += 8; // coeff: 15 bit - // dynamic range [-16320, 16320] + hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit + // dynamic range [-2040, 2040] + // buffer2: 15 bit + // dynamic range [-16320, 16320] ++tmp_buf; } + + for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; } // In place 16x16 2D Hadamard transform void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, - int16_t *coeff) { + tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 9 bit, dynamic range [-255, 255] @@ -101,15 +104,15 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, // coeff: 15 bit, dynamic range [-16320, 16320] for (idx = 0; idx < 64; ++idx) { - int16_t a0 = coeff[0]; - int16_t a1 = coeff[64]; - int16_t a2 = coeff[128]; - int16_t a3 = coeff[192]; + tran_low_t a0 = coeff[0]; + tran_low_t a1 = coeff[64]; + tran_low_t a2 = coeff[128]; + tran_low_t a3 = coeff[192]; - int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] - int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range - int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320] - int16_t b3 = (a2 - a3) >> 1; + tran_low_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] + tran_low_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range + tran_low_t b2 = (a2 + a3) >> 1; // [-16320, 16320] + tran_low_t b3 = (a2 - a3) >> 1; coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] coeff[64] = b1 + b3; @@ -122,7 +125,7 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, // coeff: 16 bits, dynamic range [-32640, 32640]. // length: value range {16, 64, 256, 1024}. -int vpx_satd_c(const int16_t *coeff, int length) { +int vpx_satd_c(const tran_low_t *coeff, int length) { int i; int satd = 0; for (i = 0; i < length; ++i) satd += abs(coeff[i]); |