summaryrefslogtreecommitdiff
path: root/vp8/encoder/encodemb.c
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2010-05-18 11:58:33 -0400
committerJohn Koleszar <jkoleszar@google.com>2010-05-18 11:58:33 -0400
commit0ea50ce9cb4b65eee6afa1d041fe8beb5abda667 (patch)
tree1f3b9019f28bc56fd3156f96e5a9653a983ee61b /vp8/encoder/encodemb.c
downloadlibvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.tar
libvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.tar.gz
libvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.tar.bz2
libvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.zip
Initial WebM release
Diffstat (limited to 'vp8/encoder/encodemb.c')
-rw-r--r--vp8/encoder/encodemb.c1129
1 files changed, 1129 insertions, 0 deletions
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
new file mode 100644
index 000000000..d82513318
--- /dev/null
+++ b/vp8/encoder/encodemb.c
@@ -0,0 +1,1129 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "encodemb.h"
+#include "reconinter.h"
+#include "quantize.h"
+#include "invtrans.h"
+#include "recon.h"
+#include "reconintra.h"
+#include "dct.h"
+#include "vpx_mem/vpx_mem.h"
+
+#if CONFIG_RUNTIME_CPU_DETECT
+#define IF_RTCD(x) (x)
+#else
+#define IF_RTCD(x) NULL
+#endif
+void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
+{
+ unsigned char *src_ptr = (*(be->base_src) + be->src);
+ short *diff_ptr = be->src_diff;
+ unsigned char *pred_ptr = bd->predictor;
+ int src_stride = be->src_stride;
+
+ int r, c;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ diff_ptr[c] = src_ptr[c] - pred_ptr[c];
+ }
+
+ diff_ptr += pitch;
+ pred_ptr += pitch;
+ src_ptr += src_stride;
+ }
+}
+
+void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+{
+ short *udiff = diff + 256;
+ short *vdiff = diff + 320;
+ unsigned char *upred = pred + 256;
+ unsigned char *vpred = pred + 320;
+
+ int r, c;
+
+ for (r = 0; r < 8; r++)
+ {
+ for (c = 0; c < 8; c++)
+ {
+ udiff[c] = usrc[c] - upred[c];
+ }
+
+ udiff += 8;
+ upred += 8;
+ usrc += stride;
+ }
+
+ for (r = 0; r < 8; r++)
+ {
+ for (c = 0; c < 8; c++)
+ {
+ vdiff[c] = vsrc[c] - vpred[c];
+ }
+
+ vdiff += 8;
+ vpred += 8;
+ vsrc += stride;
+ }
+}
+
+void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride)
+{
+ int r, c;
+
+ for (r = 0; r < 16; r++)
+ {
+ for (c = 0; c < 16; c++)
+ {
+ diff[c] = src[c] - pred[c];
+ }
+
+ diff += 16;
+ pred += 16;
+ src += stride;
+ }
+}
+
+static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+ ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+}
+
+void vp8_build_dcblock(MACROBLOCK *x)
+{
+ short *src_diff_ptr = &x->src_diff[384];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ src_diff_ptr[i] = x->coeff[i * 16];
+ }
+}
+
+void vp8_transform_mbuv(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 16; i < 24; i += 2)
+ {
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+ }
+}
+
+void vp8_transform_mbuvrd(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 16; i < 24; i += 2)
+ {
+ x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+ }
+}
+
+void vp8_transform_intra_mby(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 2)
+ {
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ }
+
+ // build dc block from 16 y dc values
+ vp8_build_dcblock(x);
+
+ // do 2nd order transform on the dc block
+ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+
+}
+
+void vp8_transform_intra_mbyrd(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 2)
+ {
+ x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ }
+
+ // build dc block from 16 y dc values
+ vp8_build_dcblock(x);
+
+ // do 2nd order transform on the dc block
+ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+}
+
+void vp8_transform_mb(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 2)
+ {
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ }
+
+ // build dc block from 16 y dc values
+ if (x->e_mbd.mbmi.mode != SPLITMV)
+ vp8_build_dcblock(x);
+
+ for (i = 16; i < 24; i += 2)
+ {
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+ }
+
+ // do 2nd order transform on the dc block
+ if (x->e_mbd.mbmi.mode != SPLITMV)
+ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+
+}
+
+void vp8_transform_mby(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 2)
+ {
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ }
+
+ // build dc block from 16 y dc values
+ if (x->e_mbd.mbmi.mode != SPLITMV)
+ {
+ vp8_build_dcblock(x);
+ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+ }
+}
+
+void vp8_transform_mbrd(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 2)
+ {
+ x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ }
+
+ // build dc block from 16 y dc values
+ if (x->e_mbd.mbmi.mode != SPLITMV)
+ vp8_build_dcblock(x);
+
+ for (i = 16; i < 24; i += 2)
+ {
+ x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+ }
+
+ // do 2nd order transform on the dc block
+ if (x->e_mbd.mbmi.mode != SPLITMV)
+ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+}
+
+void vp8_stuff_inter16x16(MACROBLOCK *x)
+{
+ vp8_build_inter_predictors_mb_s(&x->e_mbd);
+ /*
+ // recon = copy from predictors to destination
+ {
+ BLOCKD *b = &x->e_mbd.block[0];
+ unsigned char *pred_ptr = b->predictor;
+ unsigned char *dst_ptr = *(b->base_dst) + b->dst;
+ int stride = b->dst_stride;
+
+ int i;
+ for(i=0;i<16;i++)
+ vpx_memcpy(dst_ptr+i*stride,pred_ptr+16*i,16);
+
+ b = &x->e_mbd.block[16];
+ pred_ptr = b->predictor;
+ dst_ptr = *(b->base_dst) + b->dst;
+ stride = b->dst_stride;
+
+ for(i=0;i<8;i++)
+ vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
+
+ b = &x->e_mbd.block[20];
+ pred_ptr = b->predictor;
+ dst_ptr = *(b->base_dst) + b->dst;
+ stride = b->dst_stride;
+
+ for(i=0;i<8;i++)
+ vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
+ }
+ */
+}
+
+#if !(CONFIG_REALTIME_ONLY)
+extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
+extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
+extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
+extern int *vp8_dct_value_cost_ptr;
+
+static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
+{
+ int c = !type; /* start at coef 0, unless Y with Y2 */
+ int eob = b->eob;
+ int pt ; /* surrounding block/prev coef predictor */
+ int cost = 0;
+ short *qcoeff_ptr = b->qcoeff;
+
+ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+# define QC( I) ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
+
+ for (; c < eob; c++)
+ {
+ int v = QC(c);
+ int t = vp8_dct_value_tokens_ptr[v].Token;
+ cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
+ cost += vp8_dct_value_cost_ptr[v];
+ pt = vp8_prev_token_class[t];
+ }
+
+# undef QC
+
+ if (c < 16)
+ cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
+
+ return cost;
+}
+
+static int mbycost_coeffs(MACROBLOCK *mb)
+{
+ int cost = 0;
+ int b;
+ TEMP_CONTEXT t;
+ int type = 0;
+
+ MACROBLOCKD *x = &mb->e_mbd;
+
+ vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
+
+ if (x->mbmi.mode == SPLITMV)
+ type = 3;
+
+ for (b = 0; b < 16; b++)
+ cost += cost_coeffs(mb, x->block + b, type,
+ t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+
+ return cost;
+}
+
+#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
+
+void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
+{
+ BLOCK *b = &x->block[i];
+ BLOCKD *bd = &x->e_mbd.block[i];
+ short *dequant_ptr = &bd->dequant[0][0];
+ int nzpos[16] = {0};
+ short saved_qcoefs[16];
+ short saved_dqcoefs[16];
+ int baserate, baseerror, baserd;
+ int rate, error, thisrd;
+ int k;
+ int nzcoefcount = 0;
+ int nc, bestnc = 0;
+ int besteob;
+
+ // count potential coefficient to be optimized
+ for (k = !type; k < 16; k++)
+ {
+ int qcoef = abs(bd->qcoeff[k]);
+ int coef = abs(b->coeff[k]);
+ int dq = dequant_ptr[k];
+
+ if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
+ {
+ nzpos[nzcoefcount] = k;
+ nzcoefcount++;
+ }
+ }
+
+ // if nothing here, do nothing for this block.
+ if (!nzcoefcount)
+ {
+ *a = *l = (bd->eob != !type);
+ return;
+ }
+
+ // save a copy of quantized coefficients
+ vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
+ vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
+
+ besteob = bd->eob;
+ baserate = cost_coeffs(x, bd, type, a, l);
+ baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
+ baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
+
+ for (nc = 1; nc < (1 << nzcoefcount); nc++)
+ {
+ //reset coefficients
+ vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
+ vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
+
+ for (k = 0; k < nzcoefcount; k++)
+ {
+ int pos = nzpos[k];
+
+ if ((nc & (1 << k)))
+ {
+ int cur_qcoef = bd->qcoeff[pos];
+
+ if (cur_qcoef < 0)
+ {
+ bd->qcoeff[pos]++;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ else
+ {
+ bd->qcoeff[pos]--;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ }
+ }
+
+ {
+ int eob = -1;
+ int rc;
+ int m;
+
+ for (m = 0; m < 16; m++)
+ {
+ rc = vp8_default_zig_zag1d[m];
+
+ if (bd->qcoeff[rc])
+ eob = m;
+ }
+
+ bd->eob = eob + 1;
+ }
+
+ rate = cost_coeffs(x, bd, type, a, l);
+ error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
+ thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
+
+ if (thisrd < baserd)
+ {
+ baserd = thisrd;
+ bestnc = nc;
+ besteob = bd->eob;
+ }
+ }
+
+ //reset coefficients
+ vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
+ vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
+
+ if (bestnc)
+ {
+ for (k = 0; k < nzcoefcount; k++)
+ {
+ int pos = nzpos[k];
+
+ if (bestnc & (1 << k))
+ {
+ int cur_qcoef = bd->qcoeff[pos];
+
+ if (cur_qcoef < 0)
+ {
+ bd->qcoeff[pos]++;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ else
+ {
+ bd->qcoeff[pos]--;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ }
+ }
+
+#if 0
+ {
+ int eob = -1;
+ int rc;
+ int m;
+
+ for (m = 0; m < 16; m++)
+ {
+ rc = vp8_default_zig_zag1d[m];
+
+ if (bd->qcoeff[rc])
+ eob = m;
+ }
+
+ bd->eob = eob + 1;
+ }
+#endif
+ }
+
+#if 1
+ bd->eob = besteob;
+#endif
+#if 0
+ {
+ int eob = -1;
+ int rc;
+ int m;
+
+ for (m = 0; m < 16; m++)
+ {
+ rc = vp8_default_zig_zag1d[m];
+
+ if (bd->qcoeff[rc])
+ eob = m;
+ }
+
+ bd->eob = eob + 1;
+ }
+
+#endif
+ *a = *l = (bd->eob != !type);
+ return;
+}
+
+void vp8_optimize_bplus(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
+{
+ BLOCK *b = &x->block[i];
+ BLOCKD *bd = &x->e_mbd.block[i];
+ short *dequant_ptr = &bd->dequant[0][0];
+ int nzpos[16] = {0};
+ short saved_qcoefs[16];
+ short saved_dqcoefs[16];
+ int baserate, baseerror, baserd;
+ int rate, error, thisrd;
+ int k;
+ int nzcoefcount = 0;
+ int nc, bestnc = 0;
+ int besteob;
+
+ // count potential coefficient to be optimized
+ for (k = !type; k < 16; k++)
+ {
+ int qcoef = abs(bd->qcoeff[k]);
+ int coef = abs(b->coeff[k]);
+ int dq = dequant_ptr[k];
+
+ if (qcoef && (qcoef * dq < coef) && (coef < (qcoef * dq + dq)))
+ {
+ nzpos[nzcoefcount] = k;
+ nzcoefcount++;
+ }
+ }
+
+ // if nothing here, do nothing for this block.
+ if (!nzcoefcount)
+ {
+ //do not update context, we need do the other half.
+ //*a = *l = (bd->eob != !type);
+ return;
+ }
+
+ // save a copy of quantized coefficients
+ vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
+ vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
+
+ besteob = bd->eob;
+ baserate = cost_coeffs(x, bd, type, a, l);
+ baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
+ baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
+
+ for (nc = 1; nc < (1 << nzcoefcount); nc++)
+ {
+ //reset coefficients
+ vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
+ vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
+
+ for (k = 0; k < nzcoefcount; k++)
+ {
+ int pos = nzpos[k];
+
+ if ((nc & (1 << k)))
+ {
+ int cur_qcoef = bd->qcoeff[pos];
+
+ if (cur_qcoef < 0)
+ {
+ bd->qcoeff[pos]--;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ else
+ {
+ bd->qcoeff[pos]++;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ }
+ }
+
+ {
+ int eob = -1;
+ int rc;
+ int m;
+
+ for (m = 0; m < 16; m++)
+ {
+ rc = vp8_default_zig_zag1d[m];
+
+ if (bd->qcoeff[rc])
+ eob = m;
+ }
+
+ bd->eob = eob + 1;
+ }
+
+ rate = cost_coeffs(x, bd, type, a, l);
+ error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
+ thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
+
+ if (thisrd < baserd)
+ {
+ baserd = thisrd;
+ bestnc = nc;
+ besteob = bd->eob;
+ }
+ }
+
+ //reset coefficients
+ vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
+ vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
+
+ if (bestnc)
+ {
+ for (k = 0; k < nzcoefcount; k++)
+ {
+ int pos = nzpos[k];
+
+ if (bestnc & (1 << k))
+ {
+ int cur_qcoef = bd->qcoeff[pos];
+
+ if (cur_qcoef < 0)
+ {
+ bd->qcoeff[pos]++;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ else
+ {
+ bd->qcoeff[pos]--;
+ bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ }
+ }
+ }
+ }
+
+ bd->eob = besteob;
+ //do not update context, we need do the other half.
+ //*a = *l = (bd->eob != !type);
+ return;
+}
+
+void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
+{
+
+ BLOCK *b = &x->block[i];
+ BLOCKD *bd = &x->e_mbd.block[i];
+ short *dequant_ptr = &bd->dequant[0][0];
+
+ int baserate, baseerror, baserd;
+ int rate, error, thisrd;
+ int k;
+
+ if (bd->eob == 0)
+ return;
+
+ baserate = cost_coeffs(x, bd, type, a, l);
+ baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
+ baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
+
+ for (k = 0; k < 16; k++)
+ {
+ int cur_qcoef = bd->qcoeff[k];
+
+ if (!cur_qcoef)
+ continue;
+
+ if (cur_qcoef < 0)
+ {
+ bd->qcoeff[k]++;
+ bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
+ }
+ else
+ {
+ bd->qcoeff[k]--;
+ bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
+ }
+
+ if (bd->qcoeff[k] == 0)
+ {
+ int eob = -1;
+ int rc;
+ int l;
+
+ for (l = 0; l < 16; l++)
+ {
+ rc = vp8_default_zig_zag1d[l];
+
+ if (bd->qcoeff[rc])
+ eob = l;
+ }
+
+ bd->eob = eob + 1;
+ }
+
+ rate = cost_coeffs(x, bd, type, a, l);
+ error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
+ thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
+
+ if (thisrd > baserd)
+ {
+ bd->qcoeff[k] = cur_qcoef;
+ bd->dqcoeff[k] = cur_qcoef * dequant_ptr[k];
+ }
+ else
+ {
+ baserd = thisrd;
+ }
+
+ }
+
+ {
+ int eob = -1;
+ int rc;
+
+ for (k = 0; k < 16; k++)
+ {
+ rc = vp8_default_zig_zag1d[k];
+
+ if (bd->qcoeff[rc])
+ eob = k;
+ }
+
+ bd->eob = eob + 1;
+ }
+
+ return;
+}
+
+
+void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+{
+ int cost = 0;
+ int b;
+ TEMP_CONTEXT t, t2;
+ int type = 0;
+
+ vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+
+ if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
+ type = 3;
+
+ for (b = 0; b < 16; b++)
+ {
+ //vp8_optimize_bplus(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+ vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ }
+
+ vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
+ vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+
+ for (b = 16; b < 20; b++)
+ {
+ //vp8_optimize_bplus(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+ vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ }
+
+ for (b = 20; b < 24; b++)
+ {
+ //vp8_optimize_bplus(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
+ vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+ }
+}
+
+
+
+void vp8_super_slow_yquant_optimization(MACROBLOCK *x, int type, const VP8_ENCODER_RTCD *rtcd)
+{
+ BLOCK *b = &x->block[0];
+ BLOCKD *bd = &x->e_mbd.block[0];
+ short *dequant_ptr = &bd->dequant[0][0];
+ struct
+ {
+ int block;
+ int pos;
+ } nzpos[256];
+ short saved_qcoefs[256];
+ short saved_dqcoefs[256];
+ short *coef_ptr = x->coeff;
+ short *qcoef_ptr = x->e_mbd.qcoeff;
+ short *dqcoef_ptr = x->e_mbd.dqcoeff;
+
+ int baserate, baseerror, baserd;
+ int rate, error, thisrd;
+ int i, k;
+ int nzcoefcount = 0;
+ int nc, bestnc = 0;
+ int besteob;
+
+ //this code has assumption in macroblock coeff buffer layout
+ for (i = 0; i < 16; i++)
+ {
+ // count potential coefficient to be optimized
+ for (k = !type; k < 16; k++)
+ {
+ int qcoef = abs(qcoef_ptr[i*16 + k]);
+ int coef = abs(coef_ptr[i*16 + k]);
+ int dq = dequant_ptr[k];
+
+ if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
+ {
+ nzpos[nzcoefcount].block = i;
+ nzpos[nzcoefcount].pos = k;
+ nzcoefcount++;
+ }
+ }
+ }
+
+ // if nothing here, do nothing for this macro_block.
+ if (!nzcoefcount || nzcoefcount > 15)
+ {
+ return;
+ }
+
+ /******************************************************************************
+ looking from each coeffient's perspective, each identifed coefficent above could
+ have 2 values:roundeddown(x) and roundedup(x). Therefore the total number of
+ different states is less than 2**nzcoefcount.
+ ******************************************************************************/
+ // save the qunatized coefficents and dequantized coefficicents
+ vpx_memcpy(saved_qcoefs, x->e_mbd.qcoeff, 256);
+ vpx_memcpy(saved_dqcoefs, x->e_mbd.dqcoeff, 256);
+
+ baserate = mbycost_coeffs(x);
+ baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);
+ baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
+
+ for (nc = 1; nc < (1 << nzcoefcount); nc++)
+ {
+ //reset coefficients
+ vpx_memcpy(x->e_mbd.qcoeff, saved_qcoefs, 256);
+ vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
+
+ for (k = 0; k < nzcoefcount; k++)
+ {
+ int bk = nzpos[k].block;
+ int pos = nzpos[k].pos;
+ int mbkpos = bk * 16 + pos;
+
+ if ((nc & (1 << k)))
+ {
+ int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
+
+ if (cur_qcoef < 0)
+ {
+ x->e_mbd.qcoeff[mbkpos]++;
+ x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
+ }
+ else
+ {
+ x->e_mbd.qcoeff[mbkpos]--;
+ x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
+ }
+ }
+ }
+
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *bd = &x->e_mbd.block[i];
+ {
+ int eob = -1;
+ int rc;
+ int l;
+
+ for (l = 0; l < 16; l++)
+ {
+ rc = vp8_default_zig_zag1d[l];
+
+ if (bd->qcoeff[rc])
+ eob = l;
+ }
+
+ bd->eob = eob + 1;
+ }
+ }
+
+ rate = mbycost_coeffs(x);
+ error = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);;
+ thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
+
+ if (thisrd < baserd)
+ {
+ baserd = thisrd;
+ bestnc = nc;
+ besteob = bd->eob;
+ }
+ }
+
+ //reset coefficients
+ vpx_memcpy(x->e_mbd.qcoeff, saved_qcoefs, 256);
+ vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
+
+ if (bestnc)
+ {
+ for (k = 0; k < nzcoefcount; k++)
+ {
+ int bk = nzpos[k].block;
+ int pos = nzpos[k].pos;
+ int mbkpos = bk * 16 + pos;
+
+ if ((nc & (1 << k)))
+ {
+ int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
+
+ if (cur_qcoef < 0)
+ {
+ x->e_mbd.qcoeff[mbkpos]++;
+ x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
+ }
+ else
+ {
+ x->e_mbd.qcoeff[mbkpos]--;
+ x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *bd = &x->e_mbd.block[i];
+ {
+ int eob = -1;
+ int rc;
+ int l;
+
+ for (l = 0; l < 16; l++)
+ {
+ rc = vp8_default_zig_zag1d[l];
+
+ if (bd->qcoeff[rc])
+ eob = l;
+ }
+
+ bd->eob = eob + 1;
+ }
+ }
+
+ return;
+}
+
+static void vp8_find_mb_skip_coef(MACROBLOCK *x)
+{
+ int i;
+
+ x->e_mbd.mbmi.mb_skip_coeff = 1;
+
+ if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
+ }
+
+ for (i = 16; i < 25; i++)
+ {
+ x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+ }
+ }
+ else
+ {
+ for (i = 0; i < 24; i++)
+ {
+ x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+ }
+ }
+}
+
+
+void vp8_optimize_mb_slow(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+{
+ int cost = 0;
+ int b;
+ TEMP_CONTEXT t, t2;
+ int type = 0;
+
+
+ vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+
+ if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
+ type = 3;
+
+ vp8_super_slow_yquant_optimization(x, type, rtcd);
+ /*
+ for(b=0;b<16;b++)
+ {
+ vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+ }
+ */
+
+ vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
+
+ for (b = 16; b < 20; b++)
+ {
+ vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ }
+
+ vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+
+ for (b = 20; b < 24; b++)
+ {
+ vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+ }
+}
+
+
+void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+{
+ int cost = 0;
+ int b;
+ TEMP_CONTEXT t;
+ int type = 0;
+
+ if (!x->e_mbd.above_context[Y1CONTEXT])
+ return;
+
+ if (!x->e_mbd.left_context[Y1CONTEXT])
+ return;
+
+ vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+
+ if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
+ type = 3;
+
+ for (b = 0; b < 16; b++)
+ {
+ vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ }
+
+}
+
+void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+{
+ int cost = 0;
+ int b;
+ TEMP_CONTEXT t, t2;
+ int type = 0;
+
+ if (!x->e_mbd.above_context[UCONTEXT])
+ return;
+
+ if (!x->e_mbd.left_context[UCONTEXT])
+ return;
+
+ if (!x->e_mbd.above_context[VCONTEXT])
+ return;
+
+ if (!x->e_mbd.left_context[VCONTEXT])
+ return;
+
+
+ vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
+ vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+
+ for (b = 16; b < 20; b++)
+ {
+ vp8_optimize_b(x, b, vp8_block2type[b],
+ t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+
+ }
+
+ for (b = 20; b < 24; b++)
+ {
+ vp8_optimize_b(x, b, vp8_block2type[b],
+ t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+ }
+
+}
+#endif
+
+void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+ vp8_build_inter_predictors_mb(&x->e_mbd);
+
+ vp8_subtract_mb(rtcd, x);
+
+ vp8_transform_mb(x);
+
+ vp8_quantize_mb(x);
+
+#if !(CONFIG_REALTIME_ONLY)
+#if 1
+
+ if (x->optimize && x->rddiv > 1)
+ {
+ vp8_optimize_mb(x, rtcd);
+ vp8_find_mb_skip_coef(x);
+ }
+
+#endif
+#endif
+
+ vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+
+ vp8_recon16x16mb(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+}
+
+
+/* this funciton is used by first pass only */
+void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+ vp8_build_inter_predictors_mby(&x->e_mbd);
+
+ ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
+
+ vp8_transform_mby(x);
+
+ vp8_quantize_mby(x);
+
+ vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+
+ vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+}
+
+
+void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+ vp8_build_inter_predictors_mbuv(&x->e_mbd);
+
+ ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+
+ vp8_transform_mbuv(x);
+
+ vp8_quantize_mbuv(x);
+
+ vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+
+ vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+}
+
+
+void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+ vp8_build_inter_predictors_mbuv(&x->e_mbd);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+
+ vp8_transform_mbuvrd(x);
+
+ vp8_quantize_mbuvrd(x);
+
+}