summaryrefslogtreecommitdiff
path: root/vp8/common/x86/idct_blk_mmx.c
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2011-12-15 14:23:36 -0500
committerScott LaVarnway <slavarnway@google.com>2011-12-15 14:23:41 -0500
commita53d5a4c442a84cacbd8225fac72db3789b3e10c (patch)
tree85b3431c866bcfcf3a7ce9412ca26857b5dafd7a /vp8/common/x86/idct_blk_mmx.c
parentc8df1656bd94928059204242e778bd5b8b9dc7aa (diff)
downloadlibvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.tar
libvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.tar.gz
libvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.tar.bz2
libvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.zip
Moved dequant idct into common
These functions are now used by the encoder. This is WIP with the goal of creating a common idct/add for the encoder and decoder. A boost of 1.8% was seen for the HD rt test clip used. [Tero] Added needed changes to ARM side. Change-Id: Ibbb8000be09034203d7adffc457d3c3f8b06a5bf
Diffstat (limited to 'vp8/common/x86/idct_blk_mmx.c')
-rw-r--r--vp8/common/x86/idct_blk_mmx.c127
1 files changed, 127 insertions, 0 deletions
diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c
new file mode 100644
index 000000000..49cebd6f5
--- /dev/null
+++ b/vp8/common/x86/idct_blk_mmx.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp8/common/idct.h"
+#include "vp8/common/dequantize.h"
+
+extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
+
+void vp8_dequantize_b_mmx(BLOCKD *d)
+{
+ short *sq = (short *) d->qcoeff;
+ short *dq = (short *) d->dqcoeff;
+ short *q = (short *) d->dequant;
+ vp8_dequantize_b_impl_mmx(sq, dq, q);
+}
+
+void vp8_dequant_idct_add_y_block_mmx
+ (short *q, short *dq,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, dst, stride);
+ else if (eobs[0] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride);
+ else if (eobs[1] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
+ dst+4, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride);
+ else if (eobs[2] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
+ dst+8, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride);
+ else if (eobs[3] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
+ dst+12, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_mmx
+ (short *q, short *dq,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, dstu, stride);
+ else if (eobs[0] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride);
+ else if (eobs[1] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
+ dstu+4, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, dstv, stride);
+ else if (eobs[0] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride);
+ else if (eobs[1] == 1)
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
+ dstv+4, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}