diff options
author | Yaowu Xu <yaowu@google.com> | 2010-06-28 22:03:43 -0700 |
---|---|---|
committer | Yaowu Xu <yaowu@google.com> | 2010-06-28 22:10:48 -0700 |
commit | b62d093efa8bc100462995ffd8d067fe1f49612c (patch) | |
tree | a4ca48f7a4158b65ecc7963da722760017e6019f /vp8/encoder | |
parent | f1a3b1e0d94dec2d40008f36fdfad99338484b9a (diff) | |
download | libvpx-b62d093efa8bc100462995ffd8d067fe1f49612c.tar libvpx-b62d093efa8bc100462995ffd8d067fe1f49612c.tar.gz libvpx-b62d093efa8bc100462995ffd8d067fe1f49612c.tar.bz2 libvpx-b62d093efa8bc100462995ffd8d067fe1f49612c.zip |
Improve the accuracy of forward walsh-hadamard transform
Besides the slight improvement in round trip error. This
also fixes a sign bias in the forward transform, so the
round trip errors are evenly distributed between +1s and
-1s. The old bias seemed to work well with the dc sign bias
in old fdct, which no longer exist in the improved fdct.
Change-Id: I8635e7be16c69e69a8669eca5438550d23089cef
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/dct.c | 57 | ||||
-rw-r--r-- | vp8/encoder/x86/x86_csystemdependent.c | 2 |
2 files changed, 30 insertions, 29 deletions
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index 58e36109c..2827aa5a4 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -69,17 +69,18 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) short *ip = input; short *op = output; + for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - op[0] = a1 + b1; - op[1] = c1 + d1; - op[2] = a1 - b1; - op[3] = d1 - c1; + a1 = ((ip[0] + ip[2])<<2); + d1 = ((ip[1] + ip[3])<<2); + c1 = ((ip[1] - ip[3])<<2); + b1 = ((ip[0] - ip[2])<<2); + + op[0] = a1 + d1 + (a1!=0); + op[1] = b1 + c1; + op[2] = b1 - c1; + op[3] = a1 - d1; ip += pitch / 2; op += 4; } @@ -89,25 +90,25 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; - - a2 = a1 + b1; - b2 = c1 + d1; - c2 = a1 - b1; - d2 = d1 - c1; - - a2 += (a2 > 0); - b2 += (b2 > 0); - c2 += (c2 > 0); - d2 += (d2 > 0); - - op[0] = (a2) >> 1; - op[4] = (b2) >> 1; - op[8] = (c2) >> 1; - op[12] = (d2) >> 1; + a1 = ip[0] + ip[8]; + d1 = ip[4] + ip[12]; + c1 = ip[4] - ip[12]; + b1 = ip[0] - ip[8]; + + a2 = a1 + d1; + b2 = b1 + c1; + c2 = b1 - c1; + d2 = a1 - d1; + + a2 += a2<0; + b2 += b2<0; + c2 += c2<0; + d2 += d2<0; + + op[0] = (a2+3) >> 3; + op[4] = (b2+3) >> 3; + op[8] = (c2+3) >> 3; + op[12]= (d2+3) >> 3; ip++; op++; diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 4d0515662..11ef4197b 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -278,7 +278,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2; cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2; + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c ; cpi->rtcd.encodemb.berr = vp8_block_error_xmm; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; |