Make the quantizer exact.

This replaces the approximate division-by-multiplication in the quantizer with an exact one that costs just one add and one shift extra. The asm versions have not been updated in this patch, and thus have been disabled, since the new method requires different multipliers which are not compatible with the old method. Change-Id: I53ac887af0f969d906e464c88b1f4be69c6b1206
author: Timothy B. Terriberry <tterribe@xiph.org> 2010-06-28 17:15:09 -0700
committer: Timothy B. Terriberry <tterribe@xiph.org> 2010-07-23 08:48:01 -0700
commit: e04e293522a3cf3761eae3690b8efbc2aa69848b (patch)
tree: 5cb27f30b97c8296bcd2caa06bde2c344f895aac
parent: 08eed049d4f08943079483cdd5d5d9f865457a67 (diff)
download: libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.tar
libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.tar.gz
libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.tar.bz2
libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.zip
7 files changed, 44 insertions, 12 deletions
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index 4521bfc31..bfceab16c 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -63,7 +63,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
 
     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;
+    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
 #elif HAVE_ARMV6
     cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
     cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index b55bc51cb..19d307d26 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -33,6 +33,7 @@ typedef struct
 
     // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
     short(*quant)[4];
+    short(*quant_shift)[4];
     short(*zbin)[4];
     short(*zrun_zbin_boost);
     short(*round)[4];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 32cef1db1..a05b33268 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -103,6 +103,18 @@ static const int qzbin_factors[129] =
     80,
 };
 
+static void vp8cx_invert_quant(short *quant, short *shift, short d)
+{
+    unsigned t;
+    int l;
+    t = d;
+    for(l = 0; t > 1; l++)
+        t>>=1;
+    t = 1 + (1<<(16+l))/d;
+    *quant = (short)(t - (1<<16));
+    *shift = l;
+}
+
 void vp8cx_init_quantizer(VP8_COMP *cpi)
 {
     int r, c;
@@ -116,21 +128,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     {
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
+                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
         cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
         cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
         cpi->common.Y1dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
+                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
         cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
         cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
         cpi->common.Y2dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
+                           cpi->UVquant_shift[Q][0] + 0, quant_val);
         cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
         cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
         cpi->common.UVdequant[Q][0][0] = quant_val;
@@ -144,21 +159,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             c = (rc & 3);
 
             quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
+                               cpi->Y1quant_shift[Q][r] + c, quant_val);
             cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
             cpi->common.Y1dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
+                               cpi->Y2quant_shift[Q][r] + c, quant_val);
             cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
             cpi->common.Y2dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
+                               cpi->UVquant_shift[Q][r] + c, quant_val);
             cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
             cpi->common.UVdequant[Q][r][c] = quant_val;
@@ -198,6 +216,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     for (i = 0; i < 16; i++)
     {
         x->block[i].quant = cpi->Y1quant[QIndex];
+        x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
         x->block[i].zbin = cpi->Y1zbin[QIndex];
         x->block[i].round = cpi->Y1round[QIndex];
         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
@@ -211,6 +230,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     for (i = 16; i < 24; i++)
     {
         x->block[i].quant = cpi->UVquant[QIndex];
+        x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
         x->block[i].zbin = cpi->UVzbin[QIndex];
         x->block[i].round = cpi->UVround[QIndex];
         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
@@ -221,6 +241,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     // Y2
     zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
     x->block[24].quant = cpi->Y2quant[QIndex];
+    x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
     x->block[24].zbin = cpi->Y2zbin[QIndex];
     x->block[24].round = cpi->Y2round[QIndex];
     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index dd98a09d1..54646f421 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -286,6 +286,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
     for (i = 0; i < 25; i++)
     {
         z->block[i].quant           = x->block[i].quant;
+        z->block[i].quant_shift     = x->block[i].quant_shift;
         z->block[i].zbin            = x->block[i].zbin;
         z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;
         z->block[i].round           = x->block[i].round;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index fcde2205d..f76d2efcd 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -234,14 +234,17 @@ typedef struct
 {
 
     DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
 
     DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
 
     DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 181870c11..877002b08 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -25,6 +25,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     short *zbin_ptr   = &b->zbin[0][0];
     short *round_ptr  = &b->round[0][0];
     short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
     short *qcoeff_ptr = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
@@ -45,7 +46,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 
         if (x >= zbin)
         {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
             x  = (y ^ sz) - sz;                         // get the sign back
             qcoeff_ptr[rc] = x;                          // write to destination
             dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
@@ -69,6 +72,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
     short *zbin_ptr   = &b->zbin[0][0];
     short *round_ptr  = &b->round[0][0];
     short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
     short *qcoeff_ptr = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
@@ -95,7 +99,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 
         if (x >= zbin)
         {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
             x  = (y ^ sz) - sz;                         // get the sign back
             qcoeff_ptr[rc]  = x;                         // write to destination
             dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 11ef4197b..be226e040 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -238,7 +238,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;
 
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
     }
 
 #endif
@@ -285,8 +285,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
         /* cpi->rtcd.encodemb.sub* not implemented for wmt */
 
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
-        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
     }
 
 #endif
author	Timothy B. Terriberry <tterribe@xiph.org>	2010-06-28 17:15:09 -0700
committer	Timothy B. Terriberry <tterribe@xiph.org>	2010-07-23 08:48:01 -0700
commit	e04e293522a3cf3761eae3690b8efbc2aa69848b (patch)
tree	5cb27f30b97c8296bcd2caa06bde2c344f895aac
parent	08eed049d4f08943079483cdd5d5d9f865457a67 (diff)
download	libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.tar libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.tar.gz libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.tar.bz2 libvpx-e04e293522a3cf3761eae3690b8efbc2aa69848b.zip