summaryrefslogtreecommitdiff
path: root/vpx_dsp/avg.c
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2017-01-26 15:00:04 -0800
committerJames Zern <jzern@google.com>2017-01-31 23:17:09 -0800
commit969957f9f2a124861145a0d18781b855e98caa54 (patch)
tree36beed91af1ea8f5191fe7f30d69d01b9a0f5dd4 /vpx_dsp/avg.c
parent9efc42f4f89eeb05aba384e9179281ece3be6429 (diff)
downloadlibvpx-969957f9f2a124861145a0d18781b855e98caa54.tar
libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.gz
libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.bz2
libvpx-969957f9f2a124861145a0d18781b855e98caa54.zip
Fix real-time compression regression in hbd mode
This commit resolves the compression performance regression in real-time encoding setting when high bit-depth mode is enabled. The current solution temporarily disables the SIMD implementations of vpx_satd, hadamard8x8, and hadamard16x16 in high bit-depth mode. The commit makes the coding results bit-wise identical between regular coding pipeline and high bit-depth at profile 0. BUG=webm:1365 Change-Id: Icfb900821733749685370460a1a5a7e07f76f4bf
Diffstat (limited to 'vpx_dsp/avg.c')
-rw-r--r--vpx_dsp/avg.c33
1 files changed, 18 insertions, 15 deletions
diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c
index 4d9abb8de..e4cd6cca7 100644
--- a/vpx_dsp/avg.c
+++ b/vpx_dsp/avg.c
@@ -67,9 +67,10 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride,
// The order of the output coeff of the hadamard is not important. For
// optimization purposes the final transpose may be skipped.
void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
- int16_t *coeff) {
+ tran_low_t *coeff) {
int idx;
int16_t buffer[64];
+ int16_t buffer2[64];
int16_t *tmp_buf = &buffer[0];
for (idx = 0; idx < 8; ++idx) {
hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit
@@ -80,17 +81,19 @@ void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
tmp_buf = &buffer[0];
for (idx = 0; idx < 8; ++idx) {
- hadamard_col8(tmp_buf, 8, coeff); // tmp_buf: 12 bit
- // dynamic range [-2040, 2040]
- coeff += 8; // coeff: 15 bit
- // dynamic range [-16320, 16320]
+ hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit
+ // dynamic range [-2040, 2040]
+ // buffer2: 15 bit
+ // dynamic range [-16320, 16320]
++tmp_buf;
}
+
+ for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
}
// In place 16x16 2D Hadamard transform
void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
- int16_t *coeff) {
+ tran_low_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
// src_diff: 9 bit, dynamic range [-255, 255]
@@ -101,15 +104,15 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
// coeff: 15 bit, dynamic range [-16320, 16320]
for (idx = 0; idx < 64; ++idx) {
- int16_t a0 = coeff[0];
- int16_t a1 = coeff[64];
- int16_t a2 = coeff[128];
- int16_t a3 = coeff[192];
+ tran_low_t a0 = coeff[0];
+ tran_low_t a1 = coeff[64];
+ tran_low_t a2 = coeff[128];
+ tran_low_t a3 = coeff[192];
- int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
- int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
- int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
- int16_t b3 = (a2 - a3) >> 1;
+ tran_low_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
+ tran_low_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
+ tran_low_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
+ tran_low_t b3 = (a2 - a3) >> 1;
coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
coeff[64] = b1 + b3;
@@ -122,7 +125,7 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
// coeff: 16 bits, dynamic range [-32640, 32640].
// length: value range {16, 64, 256, 1024}.
-int vpx_satd_c(const int16_t *coeff, int length) {
+int vpx_satd_c(const tran_low_t *coeff, int length) {
int i;
int satd = 0;
for (i = 0; i < length; ++i) satd += abs(coeff[i]);