Fix real-time compression regression in hbd mode

This commit resolves the compression performance regression in real-time encoding setting when high bit-depth mode is enabled. The current solution temporarily disables the SIMD implementations of vpx_satd, hadamard8x8, and hadamard16x16 in high bit-depth mode. The commit makes the coding results bit-wise identical between regular coding pipeline and high bit-depth at profile 0. BUG=webm:1365 Change-Id: Icfb900821733749685370460a1a5a7e07f76f4bf
author: Jingning Han <jingning@google.com> 2017-01-26 15:00:04 -0800
committer: James Zern <jzern@google.com> 2017-01-31 23:17:09 -0800
commit: 969957f9f2a124861145a0d18781b855e98caa54 (patch)
tree: 36beed91af1ea8f5191fe7f30d69d01b9a0f5dd4 /vpx_dsp/avg.c
parent: 9efc42f4f89eeb05aba384e9179281ece3be6429 (diff)
download: libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar
libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.gz
libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.bz2
libvpx-969957f9f2a124861145a0d18781b855e98caa54.zip
1 files changed, 18 insertions, 15 deletions
diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c
index 4d9abb8de..e4cd6cca7 100644
--- a/vpx_dsp/avg.c
+++ b/vpx_dsp/avg.c
@@ -67,9 +67,10 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride,
 // The order of the output coeff of the hadamard is not important. For
 // optimization purposes the final transpose may be skipped.
 void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
-                        int16_t *coeff) {
+                        tran_low_t *coeff) {
   int idx;
   int16_t buffer[64];
+  int16_t buffer2[64];
   int16_t *tmp_buf = &buffer[0];
   for (idx = 0; idx < 8; ++idx) {
     hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
@@ -80,17 +81,19 @@ void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
 
   tmp_buf = &buffer[0];
   for (idx = 0; idx < 8; ++idx) {
-    hadamard_col8(tmp_buf, 8, coeff);  // tmp_buf: 12 bit
-                                       // dynamic range [-2040, 2040]
-    coeff += 8;                        // coeff: 15 bit
-                                       // dynamic range [-16320, 16320]
+    hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx);  // tmp_buf: 12 bit
+    // dynamic range [-2040, 2040]
+    // buffer2: 15 bit
+    // dynamic range [-16320, 16320]
     ++tmp_buf;
   }
+
+  for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
 }
 
 // In place 16x16 2D Hadamard transform
 void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
-                          int16_t *coeff) {
+                          tran_low_t *coeff) {
   int idx;
   for (idx = 0; idx < 4; ++idx) {
     // src_diff: 9 bit, dynamic range [-255, 255]
@@ -101,15 +104,15 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
 
   // coeff: 15 bit, dynamic range [-16320, 16320]
   for (idx = 0; idx < 64; ++idx) {
-    int16_t a0 = coeff[0];
-    int16_t a1 = coeff[64];
-    int16_t a2 = coeff[128];
-    int16_t a3 = coeff[192];
+    tran_low_t a0 = coeff[0];
+    tran_low_t a1 = coeff[64];
+    tran_low_t a2 = coeff[128];
+    tran_low_t a3 = coeff[192];
 
-    int16_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
-    int16_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
-    int16_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
-    int16_t b3 = (a2 - a3) >> 1;
+    tran_low_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
+    tran_low_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
+    tran_low_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
+    tran_low_t b3 = (a2 - a3) >> 1;
 
     coeff[0] = b0 + b2;  // 16 bit, [-32640, 32640]
     coeff[64] = b1 + b3;
@@ -122,7 +125,7 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
 
 // coeff: 16 bits, dynamic range [-32640, 32640].
 // length: value range {16, 64, 256, 1024}.
-int vpx_satd_c(const int16_t *coeff, int length) {
+int vpx_satd_c(const tran_low_t *coeff, int length) {
   int i;
   int satd = 0;
   for (i = 0; i < length; ++i) satd += abs(coeff[i]);
author	Jingning Han <jingning@google.com>	2017-01-26 15:00:04 -0800
committer	James Zern <jzern@google.com>	2017-01-31 23:17:09 -0800
commit	969957f9f2a124861145a0d18781b855e98caa54 (patch)
tree	36beed91af1ea8f5191fe7f30d69d01b9a0f5dd4 /vpx_dsp/avg.c
parent	9efc42f4f89eeb05aba384e9179281ece3be6429 (diff)
download	libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.gz libvpx-969957f9f2a124861145a0d18781b855e98caa54.tar.bz2 libvpx-969957f9f2a124861145a0d18781b855e98caa54.zip