diff options
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/bitstream.c | 303 | ||||
-rw-r--r-- | vp8/encoder/block.h | 8 | ||||
-rw-r--r-- | vp8/encoder/dct.c | 116 | ||||
-rw-r--r-- | vp8/encoder/dct.h | 18 | ||||
-rw-r--r-- | vp8/encoder/encodeframe.c | 456 | ||||
-rw-r--r-- | vp8/encoder/encodeintra.c | 122 | ||||
-rw-r--r-- | vp8/encoder/encodemb.c | 674 | ||||
-rw-r--r-- | vp8/encoder/encodemb.h | 12 | ||||
-rw-r--r-- | vp8/encoder/ethreading.c | 18 | ||||
-rw-r--r-- | vp8/encoder/generic/csystemdependent.c | 10 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 119 | ||||
-rw-r--r-- | vp8/encoder/onyx_int.h | 13 | ||||
-rw-r--r-- | vp8/encoder/quantize.c | 597 | ||||
-rw-r--r-- | vp8/encoder/quantize.h | 32 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 5 | ||||
-rw-r--r-- | vp8/encoder/rdopt.h | 1 | ||||
-rw-r--r-- | vp8/encoder/tokenize.c | 489 | ||||
-rw-r--r-- | vp8/encoder/tokenize.h | 4 |
18 files changed, 2896 insertions, 101 deletions
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index b3c24398c..64d1c9304 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -23,7 +23,9 @@ #include "vpx_mem/vpx_mem.h" #include "bitstream.h" #include "vp8/common/defaultcoefcounts.h" - +#if CONFIG_SEGMENTATION +static int segment_cost = 0; +#endif const int vp8cx_base_skip_false_prob[128] = { 255, 255, 255, 255, 255, 255, 255, 255, @@ -51,11 +53,19 @@ unsigned __int64 Sectionbits[500]; #ifdef ENTROPY_STATS int intra_mode_stats[10][10][10]; static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#if CONFIG_T8X8 +static unsigned int tree_update_hist_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#endif + extern unsigned int active_section; #endif #ifdef MODE_STATS int count_mb_seg[4] = { 0, 0, 0, 0 }; +#if CONFIG_SEGMENTATION +int segment_modes_intra[MAX_MB_SEGMENTS] = { 0, 0, 0, 0 }; +int segment_modes_inter[MAX_MB_SEGMENTS] = { 0, 0, 0, 0 }; +#endif #endif @@ -812,24 +822,39 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO case 0: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[1]); +#endif break; case 1: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 1, x->mb_segment_tree_probs[1]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_one(x->mb_segment_tree_probs[1]); +#endif break; case 2: vp8_write(w, 1, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[2]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_one(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[2]); +#endif break; case 3: vp8_write(w, 1, x->mb_segment_tree_probs[0]); vp8_write(w, 1, x->mb_segment_tree_probs[2]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_one(x->mb_segment_tree_probs[0]) + vp8_cost_one(x->mb_segment_tree_probs[2]); +#endif break; // TRAP.. This should not happen default: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[1]); +#endif break; } } @@ -841,7 +866,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) VP8_COMMON *const pc = & cpi->common; vp8_writer *const w = & cpi->bc; const MV_CONTEXT *mvc = pc->fc.mvc; - + MACROBLOCKD *xd = &cpi->mb.e_mbd; +#if CONFIG_SEGMENTATION + int left_id, above_id; + int i; + int sum; + int index = 0; +#endif const int *const rfct = cpi->count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; @@ -898,7 +929,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) update_mbintra_mode_probs(cpi); vp8_write_mvprobs(cpi); - +#if CONFIG_SEGMENTATION + vp8_write_bit(w, (xd->temporal_update) ? 1:0); +#endif while (++mb_row < pc->mb_rows) { int mb_col = -1; @@ -909,7 +942,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) const MV_REFERENCE_FRAME rf = mi->ref_frame; const MB_PREDICTION_MODE mode = mi->mode; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + //MACROBLOCKD *xd = &cpi->mb.e_mbd; // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units @@ -917,13 +950,53 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; xd->mb_to_top_edge = -((mb_row * 16)) << 3; xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - +#if CONFIG_SEGMENTATION + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); +#endif #ifdef ENTROPY_STATS active_section = 9; #endif +#ifdef MODE_STATS +#if CONFIG_SEGMENTATION + segment_modes_inter[mi->segment_id]++; +#endif +#endif if (cpi->mb.e_mbd.update_mb_segmentation_map) + { +#if CONFIG_SEGMENTATION + if (xd->temporal_update) + { + sum = 0; + if (mb_col != 0) + sum += (m-1)->mbmi.segment_flag; + if (mb_row != 0) + sum += (m-pc->mb_cols)->mbmi.segment_flag; + + if (m->mbmi.segment_flag == 0) + { + vp8_write(w,0,xd->mb_segment_tree_probs[3+sum]); + segment_cost += vp8_cost_zero(xd->mb_segment_tree_probs[3+sum]); + } + else + { + vp8_write(w,1,xd->mb_segment_tree_probs[3+sum]); + segment_cost += vp8_cost_one(xd->mb_segment_tree_probs[3+sum]); + write_mb_features(w, mi, &cpi->mb.e_mbd); + cpi->segmentation_map[index] = mi->segment_id; + } + } + else + { + write_mb_features(w, mi, &cpi->mb.e_mbd); + cpi->segmentation_map[index] = mi->segment_id; + } + index++; +#else write_mb_features(w, mi, &cpi->mb.e_mbd); +#endif + } if (pc->mb_no_coeff_skip) vp8_encode_bool(w, m->mbmi.mb_skip_coeff, prob_skip_false); @@ -1058,7 +1131,11 @@ static void write_kfmodes(VP8_COMP *cpi) const VP8_COMMON *const c = & cpi->common; /* const */ MODE_INFO *m = c->mi; - +#if CONFIG_SEGMENTATION + int left_id, above_id; + int i; + int index = 0; +#endif int mb_row = -1; int prob_skip_false = 0; @@ -1083,9 +1160,28 @@ static void write_kfmodes(VP8_COMP *cpi) while (++mb_col < c->mb_cols) { const int ym = m->mbmi.mode; +#if CONFIG_SEGMENTATION + MACROBLOCKD *xd = &cpi->mb.e_mbd; + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); +#endif +#ifdef MODE_STATS +#if CONFIG_SEGMENTATION + segment_modes_intra[m->mbmi.segment_id]++; +#endif +#endif if (cpi->mb.e_mbd.update_mb_segmentation_map) + { +#if CONFIG_SEGMENTATION + write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd); + cpi->segmentation_map[index] = m->mbmi.segment_id; + index++; +#else + write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd); +#endif + } if (c->mb_no_coeff_skip) vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false); @@ -1314,6 +1410,7 @@ static int default_coef_context_savings(VP8_COMP *cpi) int vp8_estimate_entropy_savings(VP8_COMP *cpi) { int savings = 0; + int i=0; const int *const rfct = cpi->count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; @@ -1378,6 +1475,65 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) savings += default_coef_context_savings(cpi); +#if CONFIG_T8X8 + i = 0; + do + { + int j = 0; + + do + { + int k = 0; + + do + { + /* at every context */ + + /* calc probs and branch cts for this frame only */ + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; + + int t = 0; /* token/prob index */ + + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + cpi->frame_coef_probs_8x8 [i][j][k], cpi->frame_branch_ct_8x8 [i][j][k], cpi->coef_counts_8x8 [i][j][k], + 256, 1 + ); + + do + { + const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; + const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; + + const vp8_prob old = cpi->common.fc.coef_probs_8x8 [i][j][k][t]; + const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t]; + + const int old_b = vp8_cost_branch(ct, old); + const int new_b = vp8_cost_branch(ct, newp); + + const int update_b = 8 + + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); + + const int s = old_b - new_b - update_b; + + if (s > 0) + savings += s; + + + } + while (++t < MAX_ENTROPY_TOKENS - 1); + + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +#endif + + return savings; } @@ -1504,6 +1660,92 @@ static void update_coef_probs(VP8_COMP *cpi) } while (++i < BLOCK_TYPES); +#if CONFIG_T8X8 + i = 0; + do + { + int j = 0; + + do + { + int k = 0; + + do + { + //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here. + /* at every context */ + + /* calc probs and branch cts for this frame only */ + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; + + int t = 0; /* token/prob index */ + + //vp8_tree_probs_from_distribution( + // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], + // 256, 1 + // ); + + do + { + const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; + const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; + + vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t; + const vp8_prob old = *Pold; + const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t]; + + const int old_b = vp8_cost_branch(ct, old); + const int new_b = vp8_cost_branch(ct, newp); + + const int update_b = 8 + + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); + + const int s = old_b - new_b - update_b; + const int u = s > 0 ? 1 : 0; + + vp8_write(w, u, upd); + + +#ifdef ENTROPY_STATS + ++ tree_update_hist_8x8 [i][j][k][t] [u]; +#endif + + if (u) + { + /* send/use new probability */ + + *Pold = newp; + vp8_write_literal(w, newp, 8); + + savings += s; + + } + + } + while (++t < MAX_ENTROPY_TOKENS - 1); + + /* Accum token counts for generation of default statistics */ +#ifdef ENTROPY_STATS + t = 0; + + do + { + context_counters_8x8 [i][j][k][t] += cpi->coef_counts_8x8 [i][j][k][t]; + } + while (++t < MAX_ENTROPY_TOKENS); + +#endif + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +#endif + } #ifdef PACKET_TESTING FILE *vpxlogc = 0; @@ -1584,8 +1826,9 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) } else vp8_start_encode(bc, cx_data); - - +#if CONFIG_SEGMENTATION + xd->update_mb_segmentation_map = 1; +#endif // Signal whether or not Segmentation is enabled vp8_write_bit(bc, (xd->segmentation_enabled) ? 1 : 0); @@ -1635,8 +1878,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) if (xd->update_mb_segmentation_map) { + #if CONFIG_SEGMENTATION // Write the probs used to decode the segment id for each macro block. + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) +#else for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) +#endif { int Data = xd->mb_segment_tree_probs[i]; @@ -1908,6 +2155,46 @@ void print_tree_update_probs() } fprintf(f, "};\n"); + +#if CONFIG_T8X8 + fprintf(f, "const vp8_prob tree_update_probs_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {\n"); + + for (i = 0; i < BLOCK_TYPES; i++) + { + fprintf(f, " { \n"); + + for (j = 0; j < COEF_BANDS; j++) + { + fprintf(f, " {\n"); + + for (k = 0; k < PREV_COEF_CONTEXTS; k++) + { + fprintf(f, " {"); + + for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + { + Sum = tree_update_hist_8x8[i][j][k][l][0] + tree_update_hist_8x8[i][j][k][l][1]; + + if (Sum > 0) + { + if (((tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum) > 0) + fprintf(f, "%3ld, ", (tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum); + else + fprintf(f, "%3ld, ", 1); + } + else + fprintf(f, "%3ld, ", 128); + } + + fprintf(f, "},\n"); + } + + fprintf(f, " },\n"); + } + + fprintf(f, " },\n"); + } +#endif fclose(f); } #endif diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 0d14b545c..8a95db798 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -46,7 +46,7 @@ typedef struct int src; int src_stride; -// MV enc_mv; + // MV enc_mv; int force_empty; } BLOCK; @@ -126,6 +126,12 @@ typedef struct void (*short_walsh4x4)(short *input, short *output, int pitch); void (*quantize_b)(BLOCK *b, BLOCKD *d); void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); + #if CONFIG_T8X8 + void (*vp8_short_fdct8x8)(short *input, short *output, int pitch); + void (*short_fhaar2x2)(short *input, short *output, int pitch); + void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); + void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); +#endif } MACROBLOCK; diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index 69a882c89..fd4c62cd4 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -11,6 +11,122 @@ #include <math.h> #include "vpx_ports/config.h" + + + + + +#if CONFIG_T8X8 +void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) +{ + int j1, i, j, k; + float b[8]; + float b1[8]; + float d[8][8]; + float f0 = (float) .7071068; + float f1 = (float) .4903926; + float f2 = (float) .4619398; + float f3 = (float) .4157348; + float f4 = (float) .3535534; + float f5 = (float) .2777851; + float f6 = (float) .1913417; + float f7 = (float) .0975452; + pitch = pitch / 2; + for (i = 0, k = 0; i < 8; i++, k += pitch) + { + for (j = 0; j < 8; j++) + { + b[j] = (float)( block[k + j]<<1); + } + /* Horizontal transform */ + for (j = 0; j < 4; j++) + { + j1 = 7 - j; + b1[j] = b[j] + b[j1]; + b1[j1] = b[j] - b[j1]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[i][0] = (b[0] + b[1]) * f4; + d[i][4] = (b[0] - b[1]) * f4; + d[i][2] = b[2] * f6 + b[3] * f2; + d[i][6] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[i][1] = b1[4] * f7 + b1[7] * f1; + d[i][5] = b1[5] * f3 + b1[6] * f5; + d[i][7] = b1[7] * f7 - b1[4] * f1; + d[i][3] = b1[6] * f3 - b1[5] * f5; + } + /* Vertical transform */ + for (i = 0; i < 8; i++) + { + for (j = 0; j < 4; j++) + { + j1 = 7 - j; + b1[j] = d[j][i] + d[j1][i]; + b1[j1] = d[j][i] - d[j1][i]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[0][i] = (b[0] + b[1]) * f4; + d[4][i] = (b[0] - b[1]) * f4; + d[2][i] = b[2] * f6 + b[3] * f2; + d[6][i] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[1][i] = b1[4] * f7 + b1[7] * f1; + d[5][i] = b1[5] * f3 + b1[6] * f5; + d[7][i] = b1[7] * f7 - b1[4] * f1; + d[3][i] = b1[6] * f3 - b1[5] * f5; + } + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + *(coefs + j + i * 8) = (short) floor(d[i][j] +0.5); + } + } + return; +} + + + +void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8 +{ + /* [1 1 ; 1 -1] orthogonal transform */ + /* use position: 0,1, 4, 8 */ + int i; + short *ip1 = input; + short *op1 = output; + for (i = 0; i < 16; i++) + { + op1[i] = 0; + } + + op1[0]=ip1[0] + ip1[1] + ip1[4] + ip1[8]; + op1[1]=ip1[0] - ip1[1] + ip1[4] - ip1[8]; + op1[4]=ip1[0] + ip1[1] - ip1[4] - ip1[8]; + op1[8]=ip1[0] - ip1[1] - ip1[4] + ip1[8]; + +} +#endif void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index fec3b4c37..c37d47aca 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -22,6 +22,20 @@ #include "arm/dct_arm.h" #endif +#if CONFIG_T8X8 + +#ifndef vp8_fdct_short8x8 +#define vp8_fdct_short8x8 vp8_short_fdct8x8_c +#endif +extern prototype_fdct(vp8_fdct_short8x8); + +#ifndef vp8_fhaar_short2x2 +#define vp8_fhaar_short2x2 vp8_short_fhaar2x2_c +#endif +extern prototype_fdct(vp8_fhaar_short2x2); + +#endif + #ifndef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_c #endif @@ -49,6 +63,10 @@ extern prototype_fdct(vp8_fdct_walsh_short4x4); typedef prototype_fdct(*vp8_fdct_fn_t); typedef struct { +#if CONFIG_T8X8 + vp8_fdct_fn_t short8x8; + vp8_fdct_fn_t haar_short2x2; +#endif vp8_fdct_fn_t short4x4; vp8_fdct_fn_t short8x4; vp8_fdct_fn_t fast4x4; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 3cc96c12e..1c4a93636 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -26,10 +26,12 @@ #include "vp8/common/findnearmv.h" #include "vp8/common/reconintra.h" #include <stdio.h> +#include <math.h> #include <limits.h> #include "vp8/common/subpixel.h" #include "vpx_ports/vpx_timer.h" + #if CONFIG_RUNTIME_CPU_DETECT #define RTCD(x) &cpi->common.rtcd.x #define IF_RTCD(x) (x) @@ -37,6 +39,18 @@ #define RTCD(x) NULL #define IF_RTCD(x) NULL #endif + +#if CONFIG_SEGMENTATION +#define SEEK_SEGID 12 +#define SEEK_SAMEID 4 +#define SEEK_DIFFID 7 +#endif + +#ifdef ENC_DEBUG +int enc_debug=0; +int mb_row_debug, mb_col_debug; +#endif + extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex); @@ -52,6 +66,8 @@ int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); + + #ifdef MODE_STATS unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; unsigned int inter_uv_modes[4] = {0, 0, 0, 0}; @@ -80,6 +96,186 @@ static const unsigned char VP8_VAR_OFFS[16]= }; + +#if CONFIG_T8X8 + +//INTRA mode transform size +//When all three criteria are off the default is 4x4 +//#define INTRA_VARIANCE_ENTROPY_CRITERIA +#define INTRA_WTD_SSE_ENTROPY_CRITERIA +//#define INTRA_TEST_8X8_ONLY +// +//INTER mode transform size +//When all three criteria are off the default is 4x4 +//#define INTER_VARIANCE_ENTROPY_CRITERIA +#define INTER_WTD_SSE_ENTROPY_CRITERIA +//#define INTER_TEST_8X8_ONLY + +double variance_Block(short *b1, int pitch, int dimension) +{ + short ip[8][8]={{0}}; + short *b = b1; + int i, j = 0; + double mean = 0.0, variance = 0.0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + ip[i][j] = b[j]; + mean += ip[i][j]; + } + b += pitch; + } + mean /= (dimension*dimension); + + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + variance += (ip[i][j]-mean)*(ip[i][j]-mean); + } + } + variance /= (dimension*dimension); + return variance; +} + +double mean_Block(short *b, int pitch, int dimension) +{ + short ip[8][8]={{0}}; + int i, j = 0; + double mean = 0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + ip[i][j] = b[j]; + mean += ip[i][j]; + } + b += pitch; + } + mean /= (dimension*dimension); + + return mean; +} + +int SSE_Block(short *b, int pitch, int dimension) +{ + int i, j, sse_block = 0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + sse_block += b[j]*b[j]; + } + b += pitch; + } + return sse_block; +} + +double Compute_Variance_Entropy(MACROBLOCK *x) +{ + double variance_8[4] = {0.0, 0.0, 0.0, 0.0}, sum_var = 0.0, all_entropy = 0.0; + variance_8[0] = variance_Block(x->block[0].src_diff, 16, 8); + variance_8[1] = variance_Block(x->block[2].src_diff, 16, 8); + variance_8[2] = variance_Block(x->block[8].src_diff, 16, 8); + variance_8[3] = variance_Block(x->block[10].src_diff, 16, 8); + sum_var = variance_8[0] + variance_8[1] + variance_8[2] + variance_8[3]; + if(sum_var) + { + int i; + for(i = 0; i <4; i++) + { + if(variance_8[i]) + { + variance_8[i] /= sum_var; + all_entropy -= variance_8[i]*log(variance_8[i]); + } + } + } + return (all_entropy /log(2)); +} + +double Compute_Wtd_SSE_SubEntropy(MACROBLOCK *x) +{ + double variance_8[4] = {0.0, 0.0, 0.0, 0.0}; + double entropy_8[4] = {0.0, 0.0, 0.0, 0.0}; + double sse_1, sse_2, sse_3, sse_4, sse_0; + int i; + for (i=0;i<3;i+=2) + { + sse_0 = SSE_Block(x->block[i].src_diff, 16, 8); + if(sse_0) + { + sse_1 = SSE_Block(x->block[i].src_diff, 16, 4)/sse_0; + sse_2 = SSE_Block(x->block[i+1].src_diff, 16, 4)/sse_0; + sse_3 = SSE_Block(x->block[i+4].src_diff, 16, 4)/sse_0; + sse_4 = SSE_Block(x->block[i+5].src_diff, 16, 4)/sse_0; + variance_8[i]= variance_Block(x->block[i].src_diff, 16, 8); + if(sse_1 && sse_2 && sse_3 && sse_4) + entropy_8[i]= (-sse_1*log(sse_1) + -sse_2*log(sse_2) + -sse_3*log(sse_3) + -sse_4*log(sse_4))/log(2); + } + } + for (i=8;i<11;i+=2) + { + if(sse_0) + { + sse_0 = SSE_Block(x->block[i].src_diff, 16, 8); + sse_1 = SSE_Block(x->block[i].src_diff, 16, 4)/sse_0; + sse_2 = SSE_Block(x->block[i+1].src_diff, 16, 4)/sse_0; + sse_3 = SSE_Block(x->block[i+4].src_diff, 16, 4)/sse_0; + sse_4 = SSE_Block(x->block[i+5].src_diff, 16, 4)/sse_0; + variance_8[i-7]= variance_Block(x->block[i].src_diff, 16, 8); + if(sse_1 && sse_2 && sse_3 && sse_4) + entropy_8[i-7]= (-sse_1*log(sse_1) + -sse_2*log(sse_2) + -sse_3*log(sse_3) + -sse_4*log(sse_4))/log(2); + } + } + if(variance_8[0]+variance_8[1]+variance_8[2]+variance_8[3]) + return (entropy_8[0]*variance_8[0]+ + entropy_8[1]*variance_8[1]+ + entropy_8[2]*variance_8[2]+ + entropy_8[3]*variance_8[3])/ + (variance_8[0]+ + variance_8[1]+ + variance_8[2]+ + variance_8[3]); + else + return 0; +} + +int vp8_8x8_selection_intra(MACROBLOCK *x) +{ +#ifdef INTRA_VARIANCE_ENTROPY_CRITERIA + return (Compute_Variance_Entropy(x) > 1.2); +#elif defined(INTRA_WTD_SSE_ENTROPY_CRITERIA) + return (Compute_Wtd_SSE_SubEntropy(x) > 1.2); +#elif defined(INTRA_TEST_8X8_ONLY) + return 1; +#else + return 0; //when all criteria are off use the default 4x4 only +#endif +} + +int vp8_8x8_selection_inter(MACROBLOCK *x) +{ +#ifdef INTER_VARIANCE_ENTROPY_CRITERIA + return (Compute_Variance_Entropy(x) > 1.5); +#elif defined(INTER_WTD_SSE_ENTROPY_CRITERIA) + return (Compute_Wtd_SSE_SubEntropy(x) > 1.5); +#elif defined(INTER_TEST_8X8_ONLY) + return 1; +#else + return 0; //when all criteria are off use the default 4x4 only +#endif +} + +#endif + // Original activity measure from Tim T's code. static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) { @@ -376,7 +572,10 @@ void encode_mb_row(VP8_COMP *cpi, int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cpi->common.mb_cols); - +#if CONFIG_SEGMENTATION + int left_id, above_id; + int sum; +#endif #if CONFIG_MULTITHREAD const int nsync = cpi->mt_sync_range; const int rightmost_col = cm->mb_cols - 1; @@ -415,6 +614,12 @@ void encode_mb_row(VP8_COMP *cpi, // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { +#ifdef ENC_DEBUG + //enc_debug = (cpi->count==29 && mb_row==5 && mb_col==0); + enc_debug = (cpi->count==4 && mb_row==17 && mb_col==13); + mb_col_debug=mb_col; + mb_row_debug=mb_row; +#endif // Distance of Mb to the left & right edges, specified in // 1/8th pel units as they are always compared to values // that are in 1/8th pel units @@ -461,8 +666,14 @@ void encode_mb_row(VP8_COMP *cpi, if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) +#if CONFIG_T8X8 + // Reset segment_id to 0 or 1 so that the default transform mode is 4x4 + if (cpi->segmentation_map[map_index+mb_col] <= 3) + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]&1; +#else if (cpi->segmentation_map[map_index+mb_col] <= 3) xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]; +#endif else xd->mode_info_context->mbmi.segment_id = 0; @@ -476,24 +687,27 @@ void encode_mb_row(VP8_COMP *cpi, if (cm->frame_type == KEY_FRAME) { *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp); + //Note the encoder may have changed the segment_id + #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mode_info_context->mbmi.mode] ++; #endif } else { *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset); + //Note the encoder may have changed the segment_id #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int b; - for (b = 0; b < xd->mbmi.partition_count; b++) + for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition->bmi[b].mode] ++; + inter_b_modes[x->partition_info->bmi[b].mode] ++; } } @@ -534,6 +748,12 @@ void encode_mb_row(VP8_COMP *cpi, // Increment the activity mask pointers. x->mb_activity_ptr++; +#if CONFIG_SEGMENTATION + if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) + xd->mode_info_context->mbmi.segment_id = 0; + else + xd->mode_info_context->mbmi.segment_id = 1; +#endif /* save the block info */ for (i = 0; i < 16; i++) xd->mode_info_context->bmi[i] = xd->block[i].bmi; @@ -546,9 +766,42 @@ void encode_mb_row(VP8_COMP *cpi, recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment useage - segment_counts[xd->mode_info_context->mbmi.segment_id] ++; +#if CONFIG_SEGMENTATION + //cpi->segmentation_map[mb_row * cm->mb_cols + mb_col] = xd->mbmi.segment_id; + if (cm->frame_type == KEY_FRAME) + { + segment_counts[xd->mode_info_context->mbmi.segment_id]++; + } + else + { + sum = 0; + if (mb_col != 0) + sum += (xd->mode_info_context-1)->mbmi.segment_flag; + if (mb_row != 0) + sum += (xd->mode_info_context-cm->mb_cols)->mbmi.segment_flag; + + if (xd->mode_info_context->mbmi.segment_id == cpi->segmentation_map[(mb_row*cm->mb_cols) + mb_col]) + xd->mode_info_context->mbmi.segment_flag = 0; + else + xd->mode_info_context->mbmi.segment_flag = 1; + if (xd->mode_info_context->mbmi.segment_flag == 0) + { + segment_counts[SEEK_SAMEID + sum]++; + segment_counts[10]++; + } + else + { + segment_counts[SEEK_DIFFID + sum]++; + segment_counts[11]++; + //calculate individual segment ids + segment_counts[xd->mode_info_context->mbmi.segment_id] ++; + } + } + segment_counts[SEEK_SEGID + xd->mode_info_context->mbmi.segment_id] ++; +#else + segment_counts[xd->mode_info_context->mbmi.segment_id] ++; +#endif // skip to next mb xd->mode_info_context++; x->partition_info++; @@ -675,7 +928,13 @@ void vp8_encode_frame(VP8_COMP *cpi) MACROBLOCKD *const xd = & x->e_mbd; TOKENEXTRA *tp = cpi->tok; +#if CONFIG_SEGMENTATION + int segment_counts[MAX_MB_SEGMENTS + SEEK_SEGID]; + int prob[3]; + int new_cost, original_cost; +#else int segment_counts[MAX_MB_SEGMENTS]; +#endif int totalrate; vpx_memset(segment_counts, 0, sizeof(segment_counts)); @@ -736,7 +995,7 @@ void vp8_encode_frame(VP8_COMP *cpi) vp8cx_frame_init_quantizer(cpi); - vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); + vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
vp8cx_initialize_me_consts(cpi, cm->base_qindex); if(cpi->oxcf.tuning == VP8_TUNE_SSIM) @@ -845,41 +1104,126 @@ void vp8_encode_frame(VP8_COMP *cpi) } - // Work out the segment probabilites if segmentation is enabled if (xd->segmentation_enabled) { int tot_count; int i; + int count1,count2,count3,count4; // Set to defaults vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); +#if CONFIG_SEGMENTATION + + tot_count = segment_counts[12] + segment_counts[13] + segment_counts[14] + segment_counts[15]; + count1 = segment_counts[12] + segment_counts[13]; + count2 = segment_counts[14] + segment_counts[15]; + + if (tot_count) + prob[0] = (count1 * 255) / tot_count; + + if (count1 > 0) + prob[1] = (segment_counts[12] * 255) /count1; + + if (count2 > 0) + prob[2] = (segment_counts[14] * 255) /count2; + + if (cm->frame_type != KEY_FRAME) + { + tot_count = segment_counts[4] + segment_counts[7]; + if (tot_count) + xd->mb_segment_tree_probs[3] = (segment_counts[4] * 255)/tot_count; + + tot_count = segment_counts[5] + segment_counts[8]; + if (tot_count) + xd->mb_segment_tree_probs[4] = (segment_counts[5] * 255)/tot_count; + + tot_count = segment_counts[6] + segment_counts[9]; + if (tot_count) + xd->mb_segment_tree_probs[5] = (segment_counts[6] * 255)/tot_count; + } tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; + count3 = segment_counts[0] + segment_counts[1]; + count4 = segment_counts[2] + segment_counts[3]; if (tot_count) + xd->mb_segment_tree_probs[0] = (count3 * 255) / tot_count; + + if (count3 > 0) + xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) /count3; + + if (count4 > 0) + xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) /count4; + + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) { - xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count; + if (xd->mb_segment_tree_probs[i] == 0) + xd->mb_segment_tree_probs[i] = 1; + } - tot_count = segment_counts[0] + segment_counts[1]; + original_cost = count1 * vp8_cost_zero(prob[0]) + count2 * vp8_cost_one(prob[0]); - if (tot_count > 0) - { - xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count; - } + if (count1 > 0) + original_cost += segment_counts[12] * vp8_cost_zero(prob[1]) + segment_counts[13] * vp8_cost_one(prob[1]); + + if (count2 > 0) + original_cost += segment_counts[14] * vp8_cost_zero(prob[2]) + segment_counts[15] * vp8_cost_one(prob[2]) ; + + new_cost = 0; + + if (cm->frame_type != KEY_FRAME) + { + new_cost = segment_counts[4] * vp8_cost_zero(xd->mb_segment_tree_probs[3]) + segment_counts[7] * vp8_cost_one(xd->mb_segment_tree_probs[3]); - tot_count = segment_counts[2] + segment_counts[3]; + new_cost += segment_counts[5] * vp8_cost_zero(xd->mb_segment_tree_probs[4]) + segment_counts[8] * vp8_cost_one(xd->mb_segment_tree_probs[4]); - if (tot_count > 0) - xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; + new_cost += segment_counts[6] * vp8_cost_zero(xd->mb_segment_tree_probs[5]) + segment_counts[9] * vp8_cost_one (xd->mb_segment_tree_probs[5]); + } + + if (tot_count > 0) + new_cost += count3 * vp8_cost_zero(xd->mb_segment_tree_probs[0]) + count4 * vp8_cost_one(xd->mb_segment_tree_probs[0]); + + if (count3 > 0) + new_cost += segment_counts[0] * vp8_cost_zero(xd->mb_segment_tree_probs[1]) + segment_counts[1] * vp8_cost_one(xd->mb_segment_tree_probs[1]); - // Zero probabilities not allowed - for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++) + if (count4 > 0) + new_cost += segment_counts[2] * vp8_cost_zero(xd->mb_segment_tree_probs[2]) + segment_counts[3] * vp8_cost_one(xd->mb_segment_tree_probs[2]) ; + + if (new_cost < original_cost) + xd->temporal_update = 1; + else + { + xd->temporal_update = 0; + xd->mb_segment_tree_probs[0] = prob[0]; + xd->mb_segment_tree_probs[1] = prob[1]; + xd->mb_segment_tree_probs[2] = prob[2]; + } +#else + tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; + count1 = segment_counts[0] + segment_counts[1]; + count2 = segment_counts[2] + segment_counts[3]; + + if (tot_count) + xd->mb_segment_tree_probs[0] = (count1 * 255) / tot_count; + + if (count1 > 0) + xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) /count1; + + if (count2 > 0) + xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) /count2; + +#endif + // Zero probabilities not allowed +#if CONFIG_SEGMENTATION + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) +#else + for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) +#endif { if (xd->mb_segment_tree_probs[i] == 0) xd->mb_segment_tree_probs[i] = 1; } - } } // 256 rate units to the bit @@ -1081,7 +1425,7 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) do { - ++ bct[xd->block[b].bmi.mode]; + ++ bct[xd->block[b].bmi.as_mode]; } while (++b < 16); } @@ -1119,6 +1463,10 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { int rate; +#if CONFIG_T8X8 + if (x->e_mbd.segmentation_enabled) + x->e_mbd.update_mb_segmentation_map = 1; +#endif if (cpi->sf.RD && cpi->compressor_speed != 2) vp8_rd_pick_intra_mode(cpi, x, &rate); else @@ -1133,12 +1481,22 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); else + { +#if CONFIG_T8X8 + if (x->e_mbd.segmentation_enabled) + x->e_mbd.mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_intra(x) << 1); +#endif vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - + } vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); sum_intra_stats(cpi, x); vp8_tokenize_mb(cpi, &x->e_mbd, t); - +#if CONFIG_T8X8 + if( x->e_mbd.mode_info_context->mbmi.segment_id >=2) + cpi->t8x8_count++; + else + cpi->t4x4_count++; +#endif return rate; } #ifdef SPEEDSTATS @@ -1260,16 +1618,25 @@ int vp8cx_encode_inter_macroblock cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + x->e_mbd.update_mb_segmentation_map = 1; +#endif + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - if (xd->mode_info_context->mbmi.mode == B_PRED) { + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); } else { +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_intra(x) << 1); +#endif + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); } @@ -1278,6 +1645,10 @@ int vp8cx_encode_inter_macroblock else { int ref_fb_idx; +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_inter(x) << 1); +#endif vp8_build_uvmvs(xd, cpi->common.full_pixel); @@ -1307,9 +1678,40 @@ int vp8cx_encode_inter_macroblock xd->dst.y_stride, xd->dst.uv_stride); } +#if CONFIG_T8X8 + if (x->e_mbd.mode_info_context->mbmi.segment_id >=2) + cpi->t8x8_count++; + else + cpi->t4x4_count++; +#endif if (!x->skip) + { +#ifdef ENC_DEBUG + if (enc_debug) + { + int i; + printf("Segment=%d [%d, %d]: %d %d:\n", x->e_mbd.mode_info_context->mbmi.segment_id, mb_col_debug, mb_row_debug, xd->mb_to_left_edge, xd->mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + printf("eobs = "); + for (i=0;i<25;i++) + printf("%d:%d ", i, xd->block[i].eob); + printf("\n"); + fflush(stdout); + } +#endif vp8_tokenize_mb(cpi, xd, t); +#ifdef ENC_DEBUG + if (enc_debug) { + printf("Tokenized\n"); + fflush(stdout); + } +#endif + } else { if (cpi->common.mb_no_coeff_skip) diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 59db0253b..10afed3ec 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -22,6 +22,10 @@ #include "encodeintra.h" +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) #else @@ -96,15 +100,67 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_intra_mby_8x8(x); + else +#endif vp8_transform_intra_mby(x); - vp8_quantize_mby(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mby_8x8(x); + else +#endif + vp8_quantize_mby(x); if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_optimize_mby_8x8(x, rtcd); + else +#endif vp8_optimize_mby(x, rtcd); + } - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + +#ifdef ENC_DEBUG + if (enc_debug) { + int i; + printf("Intra qcoeff:\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("eobs:\n"); + for (i=0;i<25;i++) + printf("%d ", x->e_mbd.block[i].eob); + printf("\n"); + } +#endif RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); @@ -116,14 +172,66 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mbuv_8x8(x); + else +#endif + vp8_transform_mbuv(x); - vp8_transform_mbuv(x); - - vp8_quantize_mbuv(x); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mbuv_8x8(x); + else +#endif + vp8_quantize_mbuv(x); + +#ifdef ENC_DEBUG + if (enc_debug) { + int i; + printf("vp8_encode_intra16x16mbuv\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + printf("qcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("eobs:\n"); + for (i=0;i<25;i++) + printf("%d ", x->e_mbd.block[i].eob); + printf("\n"); + } +#endif if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_optimize_mbuv_8x8(x, rtcd); + else +#endif vp8_optimize_mbuv(x, rtcd); + } +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index eb89bba0a..408a5956e 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -26,6 +26,11 @@ #else #define IF_RTCD(x) NULL #endif + +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *src_ptr = (*(be->base_src) + be->src); @@ -117,7 +122,21 @@ static void build_dcblock(MACROBLOCK *x) src_diff_ptr[i] = x->coeff[i * 16]; } } - +#if CONFIG_T8X8 +void vp8_build_dcblock_8x8(MACROBLOCK *x) +{ + short *src_diff_ptr = &x->src_diff[384]; + int i; + for (i = 0; i < 16; i++) + { + src_diff_ptr[i] = 0; + } + src_diff_ptr[0] = x->coeff[0 * 16]; + src_diff_ptr[1] = x->coeff[4 * 16]; + src_diff_ptr[4] = x->coeff[8 * 16]; + src_diff_ptr[8] = x->coeff[12 * 16]; +} +#endif void vp8_transform_mbuv(MACROBLOCK *x) { int i; @@ -197,10 +216,104 @@ static void transform_mby(MACROBLOCK *x) } } +#if CONFIG_T8X8 +void vp8_transform_mbuv_8x8(MACROBLOCK *x) +{ + int i; -#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) + for (i = 16; i < 24; i += 4) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } +} + + +void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed +{ + int i; + + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + vp8_build_dcblock_8x8(x); + //vp8_build_dcblock(x); + + // do 2nd order transform on the dc block + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + +} + + +void vp8_transform_mb_8x8(MACROBLOCK *x) +{ + int i; + + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + vp8_build_dcblock_8x8(x); + //vp8_build_dcblock(x); + + for (i = 16; i < 24; i += 4) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } + + // do 2nd order transform on the dc block + if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); +} + +void vp8_transform_mby_8x8(MACROBLOCK *x) +{ + int i; + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + { + //vp8_build_dcblock(x); + vp8_build_dcblock_8x8(x); + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + } +} + +#endif + +#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) +#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp8_token_state vp8_token_state; struct vp8_token_state{ @@ -581,27 +694,554 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) } } +#if CONFIG_T8X8 +void optimize_b_8x8(MACROBLOCK *mb, int i, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, ENTROPY_CONTEXT *l1, + const VP8_ENCODER_RTCD *rtcd) +{ + BLOCK *b; + BLOCKD *d; + vp8_token_state tokens[65][2]; + unsigned best_mask[2]; + const short *dequant_ptr; + const short *coeff_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + int eob; + int i0; + int rc; + int x; + int sz = 0; + int next; + int rdmult; + int rddiv; + int final_eob; + int rd_cost0; + int rd_cost1; + int rate0; + int rate1; + int error0; + int error1; + int t0; + int t1; + int best; + int band; + int pt; + + b = &mb->block[i]; + d = &mb->e_mbd.block[i]; + + /* Enable this to test the effect of RDO as a replacement for the dynamic + * zero bin instead of an augmentation of it. + */ +#if 0 + vp8_strict_quantize_b(b, d); +#endif + + dequant_ptr = d->dequant; + coeff_ptr = b->coeff; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + i0 = !type; + eob = d->eob; + + /* Now set up a Viterbi trellis to evaluate alternative roundings. */ + /* TODO: These should vary with the block type, since the quantizer does. */ + rdmult = mb->rdmult << 2; + rddiv = mb->rddiv; + best_mask[0] = best_mask[1] = 0; + /* Initialize the sentinel node of the trellis. */ + tokens[eob][0].rate = 0; + tokens[eob][0].error = 0; + tokens[eob][0].next = 64; + tokens[eob][0].token = DCT_EOB_TOKEN; + tokens[eob][0].qc = 0; + *(tokens[eob] + 1) = *(tokens[eob] + 0); + next = eob; + for (i = eob; i-- > i0;) + { + int base_bits; + int d2; + int dx; + + rc = vp8_default_zig_zag1d_8x8[i]; + x = qcoeff_ptr[rc]; + /* Only add a trellis state for non-zero coefficients. */ + if (x) + { + int shortcut=0; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + /* Evaluate the first possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + t0 = (vp8_dct_value_tokens_ptr + x)->Token; + /* Consider both possible successor states. */ + if (next < 64) + { + band = vp8_coef_bands_8x8[i + 1]; + pt = vp8_prev_token_class[t0]; + rate0 += + mb->token_costs[type][band][pt][tokens[next][0].token]; + rate1 += + mb->token_costs[type][band][pt][tokens[next][1].token]; + } + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + d2 = dx*dx; + tokens[i][0].rate = base_bits + (best ? rate1 : rate0); + tokens[i][0].error = d2 + (best ? error1 : error0); + tokens[i][0].next = next; + tokens[i][0].token = t0; + tokens[i][0].qc = x; + best_mask[0] |= best << i; + /* Evaluate the second possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + + if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) && + (abs(x)*dequant_ptr[rc!=0]<abs(coeff_ptr[rc])+dequant_ptr[rc!=0])) + shortcut = 1; + else + shortcut = 0; + + if(shortcut) + { + sz = -(x < 0); + x -= 2*sz + 1; + } + + /* Consider both possible successor states. */ + if (!x) + { + /* If we reduced this coefficient to zero, check to see if + * we need to move the EOB back here. + */ + t0 = tokens[next][0].token == DCT_EOB_TOKEN ? + DCT_EOB_TOKEN : ZERO_TOKEN; + t1 = tokens[next][1].token == DCT_EOB_TOKEN ? + DCT_EOB_TOKEN : ZERO_TOKEN; + } + else + { + t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token; + } + if (next < 64) + { + band = vp8_coef_bands_8x8[i + 1]; + if(t0!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t0]; + rate0 += mb->token_costs[type][band][pt][ + tokens[next][0].token]; + } + if(t1!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t1]; + rate1 += mb->token_costs[type][band][pt][ + tokens[next][1].token]; + } + } + + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + + if(shortcut) + { + dx -= (dequant_ptr[rc!=0] + sz) ^ sz; + d2 = dx*dx; + } + tokens[i][1].rate = base_bits + (best ? rate1 : rate0); + tokens[i][1].error = d2 + (best ? error1 : error0); + tokens[i][1].next = next; + tokens[i][1].token =best?t1:t0; + tokens[i][1].qc = x; + best_mask[1] |= best << i; + /* Finally, make this the new head of the trellis. */ + next = i; + } + /* There's no choice to make for a zero coefficient, so we don't + * add a new trellis node, but we do need to update the costs. + */ + else + { + band = vp8_coef_bands_8x8[i + 1]; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + /* Update the cost of each path if we're past the EOB token. */ + if (t0 != DCT_EOB_TOKEN) + { + tokens[next][0].rate += mb->token_costs[type][band][0][t0]; + tokens[next][0].token = ZERO_TOKEN; + } + if (t1 != DCT_EOB_TOKEN) + { + tokens[next][1].rate += mb->token_costs[type][band][0][t1]; + tokens[next][1].token = ZERO_TOKEN; + } + /* Don't update next, because we didn't add a new node. */ + } + } + + /* Now pick the best path through the whole trellis. */ + band = vp8_coef_bands_8x8[i + 1]; + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + rate0 += mb->token_costs[type][band][pt][t0]; + rate1 += mb->token_costs[type][band][pt][t1]; + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + best = rd_cost1 < rd_cost0; + final_eob = i0 - 1; + for (i = next; i < eob; i = next) + { + x = tokens[i][best].qc; + if (x) + final_eob = i; + rc = vp8_default_zig_zag1d_8x8[i]; + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; + next = tokens[i][best].next; + best = (best_mask[best] >> i) & 1; + } + final_eob++; + + d->eob = final_eob; + *a = *l = (d->eob != !type); + +} + +void optimize_mb_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + int type; + int has_2nd_order; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; + + for (b = 0; b < 16; b+=4) + { + optimize_b_8x8(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+4], + rtcd); + + if(b==0) + { + *(ta + vp8_block2above[1]) = *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[5]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[1]) = *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[5]) = *(tl + vp8_block2left[b]); + } + else if(b==4) + { + *(ta + vp8_block2above[2]) = *(ta + vp8_block2above[3]) = *(ta + vp8_block2above[6]) = *(ta + vp8_block2above[7]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[2]) = *(tl + vp8_block2left[3]) = *(tl + vp8_block2left[6]) = *(tl + vp8_block2left[7]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[1]); + *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[1]); + } + else if(b==8) + { + *(ta + vp8_block2above[9]) = *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[13]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[9]) = *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[13]) = *(tl + vp8_block2left[b]); + + } + else if(b==12) + { + *(ta + vp8_block2above[10]) = *(ta + vp8_block2above[11]) = *(ta + vp8_block2above[14]) = *(ta + vp8_block2above[15]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[10]) = *(tl + vp8_block2left[11]) = *(tl + vp8_block2left[14]) = *(tl + vp8_block2left[15]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[8]); + *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[8]); + + } + + + + } + + for (b = 16; b < 20; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + for (b = 20; b < 24; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + + /* + if (has_2nd_order) + { + vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT], + x->e_mbd.left_context[Y2CONTEXT], 1); + optimize_b(x, 24, 1, t.a, t.l, rtcd); + } + */ +} + +void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + int type; + int has_2nd_order; + + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) + return; + + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; + + for (b = 0; b < 16; b+=4) + { + optimize_b_8x8(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+4], + rtcd); + if(b==0) + { + *(ta + vp8_block2above[1]) = *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[5]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[1]) = *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[5]) = *(tl + vp8_block2left[b]); + } + else if(b==4) + { + *(ta + vp8_block2above[2]) = *(ta + vp8_block2above[3]) = *(ta + vp8_block2above[6]) = *(ta + vp8_block2above[7]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[2]) = *(tl + vp8_block2left[3]) = *(tl + vp8_block2left[6]) = *(tl + vp8_block2left[7]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[1]); + *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[1]); + } + else if(b==8) + { + *(ta + vp8_block2above[9]) = *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[13]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[9]) = *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[13]) = *(tl + vp8_block2left[b]); + + } + else if(b==12) + { + *(ta + vp8_block2above[10]) = *(ta + vp8_block2above[11]) = *(ta + vp8_block2above[14]) = *(ta + vp8_block2above[15]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[10]) = *(tl + vp8_block2left[11]) = *(tl + vp8_block2left[14]) = *(tl + vp8_block2left[15]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[8]); + *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[8]); + + } + + + } + + /* + if (has_2nd_order) + { + vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT], + x->e_mbd.left_context[Y2CONTEXT], 1); + optimize_b(x, 24, 1, t.a, t.l, rtcd); + } + */ +} + +void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) + return; + + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + for (b = 16; b < 20; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + for (b = 20; b < 24; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + +} +#endif + void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(rtcd, x); - transform_mb(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mb_8x8(x); + else +#endif + transform_mb(x); - vp8_quantize_mb(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mb_8x8(x); + else +#endif + vp8_quantize_mb(x); if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + optimize_mb_8x8(x, rtcd); + else +#endif optimize_mb(x, rtcd); + } - vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) { +#ifdef ENC_DEBUG + if (enc_debug) + { + int i; + printf("qcoeff:\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + } +#endif + } RECON_INVOKE(&rtcd->common->recon, recon_mb) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); +#ifdef ENC_DEBUG + if (enc_debug) { + int i, j, k; + printf("Final Reconstruction\n"); + for (i =0; i<16; i+=4) { + BLOCKD *b = &x->e_mbd.block[i]; + unsigned char *d = *(b->base_dst) + b->dst; + for (k=0; k<4; k++) { + for (j=0; j<16; j++) + printf("%3d ", d[j]); + printf("\n"); + d+=b->dst_stride; + } + } + } +#endif } -/* this funciton is used by first pass only */ +/* this function is used by first pass only */ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { BLOCK *b = &x->block[0]; @@ -610,22 +1250,34 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); - transform_mby(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mby_8x8(x); + else +#endif + transform_mby(x); vp8_quantize_mby(x); - - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); } - void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mbuv_8x8(x); + else +#endif vp8_transform_mbuv(x); vp8_quantize_mbuv(x); diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h index 47fc72dad..73f1ad223 100644 --- a/vp8/encoder/encodemb.h +++ b/vp8/encoder/encodemb.h @@ -103,4 +103,16 @@ void vp8_encode_inter16x16uvrd(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK * void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_encode_inter16x16y(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x); + +#if CONFIG_T8X8 +void vp8_transform_mb_8x8(MACROBLOCK *mb); +void vp8_transform_mbuv_8x8(MACROBLOCK *x); +void vp8_transform_intra_mby_8x8(MACROBLOCK *x); +void vp8_build_dcblock_8x8(MACROBLOCK *b); +void vp8_optimize_mby_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +#endif + + + #endif diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 1a37f03b9..6b9dff8d9 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -24,6 +24,14 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); extern void vp8_build_block_offsets(MACROBLOCK *x); extern void vp8_setup_block_ptrs(MACROBLOCK *x); +#ifdef MODE_STATS +extern unsigned int inter_y_modes[10]; +extern unsigned int inter_uv_modes[4]; +extern unsigned int inter_b_modes[15]; +extern unsigned int y_modes[5]; +extern unsigned int uv_modes[4]; +extern unsigned int b_modes[14]; +#endif extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); static THREAD_FUNCTION loopfilter_thread(void *p_data) @@ -175,7 +183,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) { *totalrate += vp8cx_encode_intra_macro_block(cpi, x, &tp); #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mode_info_context->mbmi.mode] ++; #endif } else @@ -183,15 +191,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset); #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int b; - for (b = 0; b < xd->mbmi.partition_count; b++) + for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition->bmi[b].mode] ++; + inter_b_modes[x->partition_info->bmi[b].mode] ++; } } diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index bd1959dff..e78514e52 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -71,6 +71,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; +#if CONFIG_T8X8 + cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c; + cpi->rtcd.fdct.haar_short2x2 = vp8_short_fhaar2x2_c; +#endif cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; @@ -88,6 +92,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.quantize.quantb_pair = vp8_regular_quantize_b_pair; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_c; +#if CONFIG_T8X8 + cpi->rtcd.quantize.quantb_8x8 = vp8_regular_quantize_b_8x8; + cpi->rtcd.quantize.fastquantb_8x8 = vp8_fast_quantize_b_8x8_c; + cpi->rtcd.quantize.quantb_2x2 = vp8_regular_quantize_b_2x2; + cpi->rtcd.quantize.fastquantb_2x2 = vp8_fast_quantize_b_2x2_c; +#endif cpi->rtcd.search.full_search = vp8_full_search_sad; cpi->rtcd.search.refining_search = vp8_refining_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index be94572d9..b5e6f8704 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -105,6 +105,9 @@ extern double vp8_calc_ssimg #ifdef OUTPUT_YUV_SRC FILE *yuv_file; #endif +#ifdef OUTPUT_YUV_REC +FILE *yuv_rec_file; +#endif #if 0 FILE *framepsnr; @@ -130,15 +133,21 @@ unsigned int tot_ef = 0; unsigned int cnt_ef = 0; #endif +#if defined(SECTIONBITS_OUTPUT) +extern unsigned __int64 Sectionbits[500]; +#endif #ifdef MODE_STATS extern unsigned __int64 Sectionbits[50]; extern int y_modes[5] ; extern int uv_modes[4] ; extern int b_modes[10] ; - extern int inter_y_modes[10] ; extern int inter_uv_modes[4] ; extern unsigned int inter_b_modes[15]; +#if CONFIG_SEGMENTATION +extern int segment_modes_intra[MAX_MB_SEGMENTS]; +extern int segment_modes_inter[MAX_MB_SEGMENTS]; +#endif #endif extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); @@ -309,7 +318,11 @@ extern FILE *vpxlogc; static void setup_features(VP8_COMP *cpi) { // Set up default state for MB feature flags +#if CONFIG_SEGMENTATION + cpi->mb.e_mbd.segmentation_enabled = 1; +#else cpi->mb.e_mbd.segmentation_enabled = 0; +#endif cpi->mb.e_mbd.update_mb_segmentation_map = 0; cpi->mb.e_mbd.update_mb_segmentation_data = 0; vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs)); @@ -408,7 +421,6 @@ static void set_segmentation_map(VP8_PTR ptr, unsigned char *segmentation_map) // Copy in the new segmentation map vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); - // Signal that the map should be updated. cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; @@ -434,12 +446,10 @@ static void set_segment_data(VP8_PTR ptr, signed char *feature_data, unsigned ch static void segmentation_test_function(VP8_PTR ptr) { VP8_COMP *cpi = (VP8_COMP *)(ptr); - unsigned char *seg_map; signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; - + CHECK_MEM_ERROR(seg_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); // MB loop to set local segmentation map /*for ( i = 0; i < cpi->common.mb_rows; i++ ) @@ -499,7 +509,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols; // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); + CHECK_MEM_ERROR(seg_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); cpi->cyclic_refresh_q = Q; @@ -1238,16 +1248,25 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (cpi->sf.improved_dct) { +#if CONFIG_T8X8 + cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); +#endif cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); } else { +#if CONFIG_T8X8 + cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); +#endif cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4); } cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4); +#if CONFIG_T8X8 + cpi->mb.short_fhaar2x2 = FDCT_INVOKE(&cpi->rtcd.fdct, haar_short2x2); +#endif if (cpi->sf.improved_quant) { @@ -1255,6 +1274,10 @@ void vp8_set_speed_features(VP8_COMP *cpi) quantb); cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_pair); +#if CONFIG_T8X8 + cpi->mb.quantize_b_8x8 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_8x8); + cpi->mb.quantize_b_2x2 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_2x2); +#endif } else { @@ -1262,6 +1285,10 @@ void vp8_set_speed_features(VP8_COMP *cpi) fastquantb); cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_pair); +#if CONFIG_T8X8 + cpi->mb.quantize_b_8x8 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_8x8); + cpi->mb.quantize_b_2x2 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_2x2); +#endif } if (cpi->sf.improved_quant != last_improved_quant) vp8cx_init_quantizer(cpi); @@ -1912,7 +1939,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); + CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); cpi->active_map_enabled = 0; @@ -1948,13 +1975,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->cyclic_refresh_q = 32; if (cpi->cyclic_refresh_mode_enabled) - { CHECK_MEM_ERROR(cpi->cyclic_refresh_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); - } else cpi->cyclic_refresh_map = (signed char *) NULL; // Test function for segmentation + //segmentation_test_function((VP8_PTR) cpi); #ifdef ENTROPY_STATS @@ -2045,6 +2071,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); #endif +#ifdef OUTPUT_YUV_REC + yuv_rec_file = fopen("rec.yuv", "wb"); +#endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); @@ -2263,8 +2292,8 @@ void vp8_remove_compressor(VP8_PTR *ptr) #ifdef MODE_STATS { extern int count_mb_seg[4]; - FILE *f = fopen("modes.stt", "a"); - double dr = (double)cpi->oxcf.frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ; + FILE *f = fopen("modes.stt", "w"); + double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000 ; fprintf(f, "intra_mode in Intra Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]); fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]); @@ -2278,6 +2307,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) fprintf(f, "\n"); } +#if CONFIG_SEGMENTATION + fprintf(f, "Segments:%8d, %8d, %8d, %8d\n", segment_modes_intra[0], segment_modes_intra[1], segment_modes_intra[2], segment_modes_intra[3]); +#endif fprintf(f, "Modes in Inter Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d\n", @@ -2297,8 +2329,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]); fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]); - - +#if CONFIG_SEGMENTATION + fprintf(f, "Segments:%8d, %8d, %8d, %8d\n", segment_modes_inter[0], segment_modes_inter[1], segment_modes_inter[2], segment_modes_inter[3]); +#endif fclose(f); } #endif @@ -2386,6 +2419,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) #ifdef OUTPUT_YUV_SRC fclose(yuv_file); #endif +#ifdef OUTPUT_YUV_REC + fclose(yuv_rec_file); +#endif #if 0 @@ -2596,10 +2632,9 @@ int vp8_update_entropy(VP8_PTR comp, int update) } -#if OUTPUT_YUV_SRC -void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) +#ifdef OUTPUT_YUV_SRC +void vp8_write_yuv_frame(YV12_BUFFER_CONFIG *s) { - FILE *yuv_file = fopen(name, "ab"); unsigned char *src = s->y_buffer; int h = s->y_height; @@ -2629,8 +2664,42 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) src += s->uv_stride; } while (--h); +} +#endif - fclose(yuv_file); +#ifdef OUTPUT_YUV_REC +void vp8_write_yuv_rec_frame(VP8_COMMON *cm) +{ + YV12_BUFFER_CONFIG *s = cm->frame_to_show; + unsigned char *src = s->y_buffer; + int h = cm->Height; + + do + { + fwrite(src, s->y_width, 1, yuv_rec_file); + src += s->y_stride; + } + while (--h); + + src = s->u_buffer; + h = (cm->Height+1)/2; + + do + { + fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } + while (--h); + + src = s->v_buffer; + h = (cm->Height+1)/2; + + do + { + fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } + while (--h); } #endif @@ -3311,6 +3380,10 @@ static void encode_frame_to_data_rate // Test code for segmentation of gf/arf (0,0) //segmentation_test_function((VP8_PTR) cpi); +#if CONFIG_SEGMENTATION + cpi->mb.e_mbd.segmentation_enabled = 1; + cpi->mb.e_mbd.update_mb_segmentation_map = 1; +#endif #if CONFIG_REALTIME_ONLY if(cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME) @@ -4536,14 +4609,8 @@ static void encode_frame_to_data_rate fclose(recon_file); } #endif -#if 0 - // DEBUG - if(cm->current_video_frame>173 && cm->current_video_frame<178) - { - char filename[512]; - sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); - vp8_write_yuv_frame(filename, cm->frame_to_show); - } +#ifdef OUTPUT_YUV_REC + vp8_write_yuv_rec_frame(cm); #endif } @@ -4898,7 +4965,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } else #endif - encode_frame_to_data_rate(cpi, size, dest, frame_flags); + encode_frame_to_data_rate(cpi, size, dest, frame_flags); if (cpi->compressor_speed == 2) { diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index f75f6cb57..3123315c4 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -193,7 +193,11 @@ typedef struct typedef struct { MACROBLOCK mb; +#if CONFIG_SEGMENTATION + int segment_counts[MAX_MB_SEGMENTS + 8]; +#else int segment_counts[MAX_MB_SEGMENTS]; +#endif int totalrate; } MB_ROW_COMP; @@ -403,6 +407,11 @@ typedef struct VP8_COMP //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#if CONFIG_T8X8 + unsigned int coef_counts_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#endif int gfu_boost; int kf_boost; @@ -461,6 +470,10 @@ typedef struct VP8_COMP int gf_update_recommended; int skip_true_count; int skip_false_count; +#if CONFIG_T8X8
+ int t4x4_count;
+ int t8x8_count;
+#endif unsigned char *segmentation_map; signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values) diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 503d24123..328eabbf9 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -16,6 +16,10 @@ #include "quantize.h" #include "vp8/common/quant_common.h" +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + #define EXACT_QUANT #ifdef EXACT_FASTQUANT @@ -77,7 +81,11 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; +#if CONFIG_T8X8 + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); +#endif eob = -1; for (i = 0; i < 16; i++) { @@ -267,7 +275,8 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) d->eob = eob + 1; } -#endif +#endif //EXACT_QUANT + void vp8_quantize_mby_c(MACROBLOCK *x) { @@ -301,6 +310,592 @@ void vp8_quantize_mbuv_c(MACROBLOCK *x) x->quantize_b(&x->block[i], &x->e_mbd.block[i]); } +#if CONFIG_T8X8 + +#ifdef EXACT_FASTQUANT +void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q2nd = 4; + + + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc]/q2nd ; + zbin = zbin_ptr[rc] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc]/q2nd); + x += (round_ptr[rc]); + //y = ((int)((int)(x * quant_ptr[rc] * q2nd) >> 16) + x) + // >> quant_shift_ptr[rc]; // quantize (x) + y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d)// only ac and dc difference, no difference among ac +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc!=0]/q1st ; + zbin = zbin_ptr[rc!=0] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += round_ptr[rc]/q1st; + //y = ((int)(((int)((x * quant_ptr[rc!=0] * q1st)) >> 16) + x)) + // >> quant_shift_ptr[rc!=0]; // quantize (x) + x += round_ptr[rc]; + y = ((int)(((int)((x * quant_ptr[rc!=0])) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +#else + +void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc]/q2nd; + zbin = zbin_ptr[rc]; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((int)((x + round_ptr[rc]/q2nd) * quant_ptr[rc] * q2nd)) >> 16; // quantize (x) + y = ((int)((x + round_ptr[rc]) * quant_ptr[rc])) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc] / q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; + //if (d->eob > 4) printf("Flag Fast 2 (%d)\n", d->eob); +} + +void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc!=0]/q1st ; + zbin = zbin_ptr[rc!=0] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((int)((x + round_ptr[rc!=0] / q1st) * quant_ptr[rc!=0] * q1st)) >> 16; + y = ((int)((x + round_ptr[rc!=0]) * quant_ptr[rc!=0])) >> 16; + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +#endif //EXACT_FASTQUANT + +#ifdef EXACT_QUANT +void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + //zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value)/q2nd; + zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc]/q2nd); + x += (round_ptr[rc]); + y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc]/q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q1st = 2; + + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + + //zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value)/q1st; + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc!=0]/q1st); + //y = ((int)(((int)(x * quant_ptr[rc!=0] * q1st) >> 16) + x)) + // >> quant_shift_ptr[rc!=0]; // quantize (x) + x += (round_ptr[rc!=0]); + y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_strict_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + unsigned char *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + //double q2nd = 4; + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + for (i = 0; i < 4; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //z = z * q2nd; + //dq = dequant_ptr[rc]/q2nd; + dq = dequant_ptr[rc]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x */ + y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save * the * coefficient and its dequantized value. */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + + d->eob = eob + 1; +} + +void vp8_strict_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + unsigned char *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + //double q1st = 2; + printf("call strict quantizer\n"); + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + for (i = 0; i < 64; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //z = z * q1st; + //dq = dequant_ptr[rc!=0]/q1st; + dq = dequant_ptr[rc!=0]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x. */ + y = ((int)(((int)((x * quant_ptr[rc!=0])) >> 16) + x)) >> quant_shift_ptr[rc!=0]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save the coefficient and its dequantized value. * */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + d->eob = eob + 1; +} + +#else + +void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value)/q2nd; + zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = (((x + round_ptr[rc]/q2nd) * quant_ptr[rc]*q2nd)) >> 16; // quantize (x) + y = (((x + round_ptr[rc]) * quant_ptr[rc])) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc]/q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value)/q1st; + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((x + round_ptr[rc!=0]/q1st) * quant_ptr[rc!=0] * q1st) >> 16; + y = ((x + round_ptr[rc!=0]) * quant_ptr[rc!=0]) >> 16; + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]/q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + d->eob = eob + 1; +} + +#endif //EXACT_QUANT + +void vp8_quantize_mby_8x8(MACROBLOCK *x) +{ + int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + for(i = 0; i < 16; i ++) + { + x->e_mbd.block[i].eob = 0; + } + x->e_mbd.block[24].eob = 0; + for (i = 0; i < 16; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + + if (has_2nd_order) + x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); + +} + +void vp8_quantize_mb_8x8(MACROBLOCK *x) +{ + int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + for(i = 0; i < 25; i ++) + { + x->e_mbd.block[i].eob = 0; + } + for (i = 0; i < 24; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + + if (has_2nd_order) + x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); +} + +void vp8_quantize_mbuv_8x8(MACROBLOCK *x) +{ + int i; + + for(i = 16; i < 24; i ++) + { + x->e_mbd.block[i].eob = 0; + } + for (i = 16; i < 24; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); +} + +#endif //CONFIG_T8X8 + /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h index f1f0156d8..1a2bad667 100644 --- a/vp8/encoder/quantize.h +++ b/vp8/encoder/quantize.h @@ -45,6 +45,27 @@ extern prototype_quantize_block_pair(vp8_quantize_quantb_pair); #define vp8_quantize_fastquantb vp8_fast_quantize_b_c #endif extern prototype_quantize_block(vp8_quantize_fastquantb); +#if CONFIG_T8X8 +#ifndef vp8_quantize_quantb_8x8 +#define vp8_quantize_quantb_8x8 vp8_regular_quantize_b_8x8 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_8x8); + +#ifndef vp8_quantize_fastquantb_8x8 +#define vp8_quantize_fastquantb_8x8 vp8_fast_quantize_b_8x8_c +#endif +extern prototype_quantize_block(vp8_quantize_fastquantb_8x8); + +#ifndef vp8_quantize_quantb_2x2 +#define vp8_quantize_quantb_2x2 vp8_regular_quantize_b_2x2 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_2x2); + +#ifndef vp8_quantize_fastquantb_2x2 +#define vp8_quantize_fastquantb_2x2 vp8_fast_quantize_b_2x2_c +#endif +extern prototype_quantize_block(vp8_quantize_fastquantb_2x2); +#endif #ifndef vp8_quantize_fastquantb_pair #define vp8_quantize_fastquantb_pair vp8_fast_quantize_b_pair_c @@ -56,6 +77,12 @@ typedef struct prototype_quantize_block(*quantb); prototype_quantize_block_pair(*quantb_pair); prototype_quantize_block(*fastquantb); +#if CONFIG_T8X8 + prototype_quantize_block(*quantb_8x8); + prototype_quantize_block(*fastquantb_8x8); + prototype_quantize_block(*quantb_2x2); + prototype_quantize_block(*fastquantb_2x2); +#endif prototype_quantize_block_pair(*fastquantb_pair); } vp8_quantize_rtcd_vtable_t; @@ -81,7 +108,10 @@ extern prototype_quantize_mb(vp8_quantize_mby); #endif extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d); - +#if CONFIG_T8X8 +extern void vp8_strict_quantize_b_8x8(BLOCK *b,BLOCKD *d); +extern void vp8_strict_quantize_b_2x2(BLOCK *b,BLOCKD *d); +#endif struct VP8_COMP; extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q); extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index f1a3fb380..952977094 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -197,7 +197,6 @@ static int rdmult_lut[QINDEX_RANGE]= 61347,64827,69312,73947,78732,83667,89787,97200, }; #endif - /* values are now correlated to quantizer */ static int sad_per_bit16lut[QINDEX_RANGE] = { @@ -252,8 +251,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) { int q; int i; - int *thresh; - int threshmult; vp8_clear_system_state(); //__asm emms; @@ -268,7 +265,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) if (cpi->zbin_over_quant > 0) { double oq_factor; - double modq; // Experimental code using the same basic equation as used for Q above // The units of cpi->zbin_over_quant are 1/128 of Q bin size @@ -1055,7 +1051,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels } - static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0}; diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index 95134cb81..ea04cbf25 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -13,6 +13,7 @@ #define __INC_RDOPT_H #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) +#define RDCOST_8x8(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 15e7336b1..e1e124844 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -22,18 +22,27 @@ #ifdef ENTROPY_STATS _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_T8X8 +_int64 context_counters_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +#if CONFIG_T8X8 +void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +#endif void vp8_fix_contexts(MACROBLOCKD *x); static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE*2]; const TOKENVALUE *vp8_dct_value_tokens_ptr; static int dct_value_cost[DCT_MAX_VALUE*2]; const int *vp8_dct_value_cost_ptr; -#if 0 -int skip_true_count = 0; -int skip_false_count = 0; + +#ifdef ENC_DEBUG +extern int mb_row_debug; +extern int mb_col_debug; +extern int enc_debug; #endif + static void fill_value_tokens() { @@ -93,6 +102,69 @@ static void fill_value_tokens() vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } +#if CONFIG_T8X8 +static void tokenize2nd_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + int c = 0; /* start at DC */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + assert(eob<=4); + + do + { + const int band = vp8_coef_bands[c]; + + if (c < eob) + { + int rc = vp8_default_zig_zag1d[c]; + const int v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + else + x = DCT_EOB_TOKEN; + + t->Token = x; + //printf("Token : %d\n", x); + t->context_tree = cpi->common.fc.coef_probs_8x8 [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + +#ifdef ENC_DEBUG + if (t->skip_eob_node && vp8_coef_encodings[x].Len==1) + printf("Trouble 2 x=%d Len=%d skip=%d eob=%d c=%d band=%d type=%d: [%d %d %d]\n", + x, vp8_coef_encodings[x].Len, t->skip_eob_node, eob, c, band, type, + cpi->count, mb_row_debug, mb_col_debug); +#endif + + ++cpi->coef_counts_8x8 [type] [band] [pt] [x]; + } + while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 4); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; + +} +#endif + static void tokenize2nd_order_b ( MACROBLOCKD *x, @@ -153,6 +225,66 @@ static void tokenize2nd_order_b *a = *l = pt; } +#if CONFIG_T8X8 +static void tokenize1st_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + int c = type ? 0 : 1; /* start at DC unless type 0 */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + + do + { + const int band = vp8_coef_bands_8x8[c]; + + x = DCT_EOB_TOKEN; + + if (c < eob) + { + int rc = vp8_default_zig_zag1d_8x8[c]; + const int v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + + t->Token = x; + t->context_tree = cpi->common.fc.coef_probs_8x8 [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + +#ifdef ENC_DEBUG + if (t->skip_eob_node && vp8_coef_encodings[x].Len==1) + printf("Trouble 1 x=%d Len=%d skip=%d eob=%d c=%d band=%d type=%d: [%d %d %d]\n", x, vp8_coef_encodings[x].Len, t->skip_eob_node, eob, c, band, type, cpi->count, mb_row_debug, mb_col_debug); +#endif + + ++cpi->coef_counts_8x8 [type] [band] [pt] [x]; + } + while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 64); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; +} + +#endif + static void tokenize1st_order_b ( @@ -293,22 +425,59 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) return skip; } +#if CONFIG_T8X8 +static int mb_is_skippable_8x8(MACROBLOCKD *x) +{ + int has_y2_block; + int skip = 1; + int i = 0; + + has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED + && x->mode_info_context->mbmi.mode != SPLITMV); + if (has_y2_block) + { + for (i = 0; i < 16; i+=4) + skip &= (x->block[i].eob < 2); + } + + for (; i < 24 + has_y2_block; i+=4) + skip &= (!x->block[i].eob); + + return skip; +} +#endif void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { int plane_type; int has_y2_block; + int b; has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV); - x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); + x->mode_info_context->mbmi.mb_skip_coeff = +#if CONFIG_T8X8 + (x->mode_info_context->mbmi.segment_id >= 2 ? + mb_is_skippable_8x8(x) : + mb_is_skippable(x, has_y2_block)); +#else + mb_is_skippable(x, has_y2_block); +#endif + if (x->mode_info_context->mbmi.mb_skip_coeff) { cpi->skip_true_count++; if (!cpi->common.mb_no_coeff_skip) - vp8_stuff_mb(cpi, x, t) ; + { +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + vp8_stuff_mb_8x8(cpi, x, t) ; + else +#endif + vp8_stuff_mb(cpi, x, t) ; + } else { vp8_fix_contexts(x); @@ -322,13 +491,82 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) plane_type = 3; if(has_y2_block) { - tokenize2nd_order_b(x, t, cpi); - plane_type = 0; +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + tokenize2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type, + A + vp8_block2above[24], L + vp8_block2left[24], cpi); + } + else +#endif + tokenize2nd_order_b(x, t, cpi); + + plane_type = 0; } +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + for (b = 0; b < 16; b+=4) + { + tokenize1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+4], + cpi); + /* *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]);*/ + // build coeff context for 8x8 transform + if(b==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *(L + vp8_block2left[b]); + } + else if(b==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(b==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *(L + vp8_block2left[b]); + } + else if(b==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + } - tokenize1st_order_b(x, t, plane_type, cpi); + } + for (b = 16; b < 24; b+=4) { + tokenize1st_order_b_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+2], + cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + } + else +#endif + tokenize1st_order_b(x, t, plane_type, cpi); } @@ -337,6 +575,9 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) void init_context_counters(void) { vpx_memset(context_counters, 0, sizeof(context_counters)); +#if CONFIG_T8X8 + vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); +#endif } void print_context_counters() @@ -381,6 +622,55 @@ void print_context_counters() const _int64 x = context_counters [type] [band] [pt] [t]; const int y = (int) x; + assert(x == (INT64) y); /* no overflow handling yet */ + fprintf(f, "%s %d", Comma(t), y); + + } + while (++t < MAX_ENTROPY_TOKENS); + + fprintf(f, "}"); + } + while (++pt < PREV_COEF_CONTEXTS); + + fprintf(f, "\n }"); + + } + while (++band < COEF_BANDS); + + fprintf(f, "\n }"); + } + while (++type < BLOCK_TYPES); + +#if CONFIG_T8X8 + fprintf(f, "int Contexts_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];\n\n"); + + fprintf(f, "const int default_contexts_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); + + type = 0; + + do + { + fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); + + band = 0; + + do + { + fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); + + pt = 0; + + do + { + fprintf(f, "%s\n {", Comma(pt)); + + t = 0; + + do + { + const _int64 x = context_counters [type] [band] [pt] [t]; + const int y = (int) x; + assert(x == (_int64) y); /* no overflow handling yet */ fprintf(f, "%s %d", Comma(t), y); @@ -399,6 +689,7 @@ void print_context_counters() fprintf(f, "\n }"); } while (++type < BLOCK_TYPES); +#endif fprintf(f, "\n};\n"); fclose(f); @@ -411,6 +702,188 @@ void vp8_tokenize_initialize() fill_value_tokens(); } +#if CONFIG_T8X8 +static __inline void stuff2nd_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [1] [0] [pt]; + //t->section = 11; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8 [1] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + + *tp = t; + pt = 0; + *a = *l = pt; + +} + +static __inline void stuff1st_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [0] [1] [pt]; + //t->section = 8; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8 [0] [1] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; + + +} + +static __inline +void stuff1st_order_buv_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [2] [0] [pt]; + //t->section = 13; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8[2] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; + +} + +void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) +{ + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + int plane_type; + int b; + + stuff2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type, + A + vp8_block2above[24], L + vp8_block2left[24], cpi); + plane_type = 0; + + for (b = 0; b < 16; b+=4) { + stuff1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+4], + cpi); + // build coeff context for 8x8 transform + if(b==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *(L + vp8_block2left[b]); + } + else if(b==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(b==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *(L + vp8_block2left[b]); + + } + else if(b==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + + } + + } + /* + for (b = 0; b < 16; b+=4) { + stuff1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + */ + + for (b = 16; b < 24; b+=4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+2], + cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + /* + for (b = 16; b < 24; b+=4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + */ +} +#endif static __inline void stuff2nd_order_b ( diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index 04a8879cf..cd122f19c 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -38,8 +38,10 @@ void init_context_counters(); void print_context_counters(); extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_T8X8 +extern _int64 context_counters_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif #endif - extern const int *vp8_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the |