/* * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_ports/config.h" #include "encodemb.h" #include "reconinter.h" #include "quantize.h" #include "invtrans.h" #include "recon.h" #include "reconintra.h" #include "dct.h" #include "vpx_mem/vpx_mem.h" #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) #else #define IF_RTCD(x) NULL #endif void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *src_ptr = (*(be->base_src) + be->src); short *diff_ptr = be->src_diff; unsigned char *pred_ptr = bd->predictor; int src_stride = be->src_stride; int r, c; for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { diff_ptr[c] = src_ptr[c] - pred_ptr[c]; } diff_ptr += pitch; pred_ptr += pitch; src_ptr += src_stride; } } void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) { short *udiff = diff + 256; short *vdiff = diff + 320; unsigned char *upred = pred + 256; unsigned char *vpred = pred + 320; int r, c; for (r = 0; r < 8; r++) { for (c = 0; c < 8; c++) { udiff[c] = usrc[c] - upred[c]; } udiff += 8; upred += 8; usrc += stride; } for (r = 0; r < 8; r++) { for (c = 0; c < 8; c++) { vdiff[c] = vsrc[c] - vpred[c]; } vdiff += 8; vpred += 8; vsrc += stride; } } void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride) { int r, c; for (r = 0; r < 16; r++) { for (c = 0; c < 16; c++) { diff[c] = src[c] - pred[c]; } diff += 16; pred += 16; src += stride; } } static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); } void vp8_build_dcblock(MACROBLOCK *x) { short *src_diff_ptr = &x->src_diff[384]; int i; for (i = 0; i < 16; i++) { src_diff_ptr[i] = x->coeff[i * 16]; } } void vp8_transform_mbuv(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 2) { x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } void vp8_transform_mbuvrd(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 2) { x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } void vp8_transform_intra_mby(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values vp8_build_dcblock(x); // do 2nd order transform on the dc block x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } void vp8_transform_intra_mbyrd(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values vp8_build_dcblock(x); // do 2nd order transform on the dc block x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } void vp8_transform_mb(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values if (x->e_mbd.mbmi.mode != SPLITMV) vp8_build_dcblock(x); for (i = 16; i < 24; i += 2) { x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } // do 2nd order transform on the dc block if (x->e_mbd.mbmi.mode != SPLITMV) x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } void vp8_transform_mby(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values if (x->e_mbd.mbmi.mode != SPLITMV) { vp8_build_dcblock(x); x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } } void vp8_transform_mbrd(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values if (x->e_mbd.mbmi.mode != SPLITMV) vp8_build_dcblock(x); for (i = 16; i < 24; i += 2) { x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } // do 2nd order transform on the dc block if (x->e_mbd.mbmi.mode != SPLITMV) x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } void vp8_stuff_inter16x16(MACROBLOCK *x) { vp8_build_inter_predictors_mb_s(&x->e_mbd); /* // recon = copy from predictors to destination { BLOCKD *b = &x->e_mbd.block[0]; unsigned char *pred_ptr = b->predictor; unsigned char *dst_ptr = *(b->base_dst) + b->dst; int stride = b->dst_stride; int i; for(i=0;i<16;i++) vpx_memcpy(dst_ptr+i*stride,pred_ptr+16*i,16); b = &x->e_mbd.block[16]; pred_ptr = b->predictor; dst_ptr = *(b->base_dst) + b->dst; stride = b->dst_stride; for(i=0;i<8;i++) vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8); b = &x->e_mbd.block[20]; pred_ptr = b->predictor; dst_ptr = *(b->base_dst) + b->dst; stride = b->dst_stride; for(i=0;i<8;i++) vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8); } */ } #if !(CONFIG_REALTIME_ONLY) extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2]; extern const TOKENEXTRA *vp8_dct_value_tokens_ptr; extern int vp8_dct_value_cost[DCT_MAX_VALUE*2]; extern int *vp8_dct_value_cost_ptr; static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { int c = !type; /* start at coef 0, unless Y with Y2 */ int eob = b->eob; int pt ; /* surrounding block/prev coef predictor */ int cost = 0; short *qcoeff_ptr = b->qcoeff; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); # define QC( I) ( qcoeff_ptr [vp8_default_zig_zag1d[I]] ) for (; c < eob; c++) { int v = QC(c); int t = vp8_dct_value_tokens_ptr[v].Token; cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t]; cost += vp8_dct_value_cost_ptr[v]; pt = vp8_prev_token_class[t]; } # undef QC if (c < 16) cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN]; return cost; } static int mbycost_coeffs(MACROBLOCK *mb) { int cost = 0; int b; TEMP_CONTEXT t; int type = 0; MACROBLOCKD *x = &mb->e_mbd; vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4); if (x->mbmi.mode == SPLITMV) type = 3; for (b = 0; b < 16; b++) cost += cost_coeffs(mb, x->block + b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]); return cost; } #define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd) { BLOCK *b = &x->block[i]; BLOCKD *bd = &x->e_mbd.block[i]; short *dequant_ptr = &bd->dequant[0][0]; int nzpos[16] = {0}; short saved_qcoefs[16]; short saved_dqcoefs[16]; int baserate, baseerror, baserd; int rate, error, thisrd; int k; int nzcoefcount = 0; int nc, bestnc = 0; int besteob; // count potential coefficient to be optimized for (k = !type; k < 16; k++) { int qcoef = abs(bd->qcoeff[k]); int coef = abs(b->coeff[k]); int dq = dequant_ptr[k]; if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq)) { nzpos[nzcoefcount] = k; nzcoefcount++; } } // if nothing here, do nothing for this block. if (!nzcoefcount) { *a = *l = (bd->eob != !type); return; } // save a copy of quantized coefficients vpx_memcpy(saved_qcoefs, bd->qcoeff, 32); vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32); besteob = bd->eob; baserate = cost_coeffs(x, bd, type, a, l); baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2; baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100); for (nc = 1; nc < (1 << nzcoefcount); nc++) { //reset coefficients vpx_memcpy(bd->qcoeff, saved_qcoefs, 32); vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32); for (k = 0; k < nzcoefcount; k++) { int pos = nzpos[k]; if ((nc & (1 << k))) { int cur_qcoef = bd->qcoeff[pos]; if (cur_qcoef < 0) { bd->qcoeff[pos]++; bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; } else { bd->qcoeff[pos]--; bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; } } } { int eob = -1; int rc; int m; for (m = 0; m < 16; m++) { rc = vp8_default_zig_zag1d[m]; if (bd->qcoeff[rc]) eob = m; } bd->eob = eob + 1; } rate = cost_coeffs(x, bd, type, a, l); error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2; thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100); if (thisrd < baserd) { baserd = thisrd; bestnc = nc; besteob = bd->eob; } } //reset coefficients vpx_memcpy(bd->qcoeff, saved_qcoefs, 32); vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32); if (bestnc) { for (k = 0; k < nzcoefcount; k++) { int pos = nzpos[k]; if (bestnc & (1 << k)) { int cur_qcoef = bd->qcoeff[pos]; if (cur_qcoef < 0) { bd->qcoeff[pos]++; bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; } else { bd->qcoeff[pos]--; bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; } } } #if 0 { int eob = -1; int rc; int m; for (m = 0; m < 16; m++) { rc = vp8_default_zig_zag1d[m]; if (bd->qcoeff[rc]) eob = m; } bd->eob = eob + 1; } #endif } #if 1 bd->eob = besteob; #endif #if 0 { int eob = -1; int rc; int m; for (m = 0; m < 16; m++) { rc = vp8_default_zig_zag1d[m]; if (bd->qcoeff[rc]) eob = m; } bd->eob = eob + 1; } #endif *a = *l = (bd->eob != !type); return; } void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd) { BLOCK *b = &x->block[i]; BLOCKD *bd = &x->e_mbd.block[i]; short *dequant_ptr = &bd->dequant[0][0]; int baserate, baseerror, baserd; int rate, error, thisrd; int k; if (bd->eob == 0) return; baserate = cost_coeffs(x, bd, type, a, l); baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4; baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100); for (k = 0; k < 16; k++) { int cur_qcoef = bd->qcoeff[k]; if (!cur_qcoef) continue; if (cur_qcoef < 0) { bd->qcoeff[k]++; bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k]; } else { bd->qcoeff[k]--; bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k]; } if (bd->qcoeff[k] == 0) { int eob = -1; int rc; int l; for (l = 0; l < 16; l++) { rc = vp8_default_zig_zag1d[l]; if (bd->qcoeff[rc]) eob = l; } bd->eob = eob + 1; } rate = cost_coeffs(x, bd, type, a, l); error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4; thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100); if (thisrd > baserd) { bd->qcoeff[k] = cur_qcoef; bd->dqcoeff[k] = cur_qcoef * dequant_ptr[k]; } else { baserd = thisrd; } } { int eob = -1; int rc; for (k = 0; k < 16; k++) { rc = vp8_default_zig_zag1d[k]; if (bd->qcoeff[rc]) eob = k; } bd->eob = eob + 1; } return; } void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { int b; TEMP_CONTEXT t, t2; int type = 0; vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4); if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED) type = 3; for (b = 0; b < 16; b++) { //vp8_optimize_bplus(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]); vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); } vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2); vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2); for (b = 16; b < 20; b++) { //vp8_optimize_bplus(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b]); vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); } for (b = 20; b < 24; b++) { //vp8_optimize_bplus(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]); vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); } } static void vp8_find_mb_skip_coef(MACROBLOCK *x) { int i; x->e_mbd.mbmi.mb_skip_coeff = 1; if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV) { for (i = 0; i < 16; i++) { x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2); } for (i = 16; i < 25; i++) { x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); } } else { for (i = 0; i < 24; i++) { x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); } } } void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { int b; TEMP_CONTEXT t; int type = 0; if (!x->e_mbd.above_context[Y1CONTEXT]) return; if (!x->e_mbd.left_context[Y1CONTEXT]) return; vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4); if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED) type = 3; for (b = 0; b < 16; b++) { vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); } } void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { int b; TEMP_CONTEXT t, t2; if (!x->e_mbd.above_context[UCONTEXT]) return; if (!x->e_mbd.left_context[UCONTEXT]) return; if (!x->e_mbd.above_context[VCONTEXT]) return; if (!x->e_mbd.left_context[VCONTEXT]) return; vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2); vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2); for (b = 16; b < 20; b++) { vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); } for (b = 20; b < 24; b++) { vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); } } #endif void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(rtcd, x); vp8_transform_mb(x); vp8_quantize_mb(x); #if !(CONFIG_REALTIME_ONLY) #if 1 if (x->optimize && x->rddiv > 1) { vp8_optimize_mb(x, rtcd); vp8_find_mb_skip_coef(x); } #endif #endif vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon16x16mb(IF_RTCD(&rtcd->common->recon), &x->e_mbd); } /* this funciton is used by first pass only */ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mby(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride); vp8_transform_mby(x); vp8_quantize_mby(x); vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd); } void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); vp8_transform_mbuv(x); vp8_quantize_mbuv(x); vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); } void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); vp8_transform_mbuvrd(x); vp8_quantize_mbuv(x); }