diff options
38 files changed, 4029 insertions, 1271 deletions
@@ -249,6 +249,8 @@ EXPERIMENT_LIST=" useselectrefmv modelcoefprob loop_dering + implicit_compoundinter_weight + scatterscan " CONFIG_LIST=" external_build diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index e666b6c7e..383196904 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -50,7 +50,7 @@ const int vp8cx_base_skip_false_prob[128] = unsigned __int64 Sectionbits[500]; #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS int intra_mode_stats[10][10][10]; static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; extern unsigned int active_section; @@ -531,7 +531,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) vp8_convert_rfct_to_prob(cpi); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 1; #endif @@ -580,7 +580,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) xd->mb_to_top_edge = -((mb_row * 16)) << 3; xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 9; #endif @@ -593,7 +593,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) if (rf == INTRA_FRAME) { vp8_write(w, 0, cpi->prob_intra_coded); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 6; #endif write_ymode(w, mode, pc->fc.ymode_prob); @@ -633,13 +633,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) vp8_mv_ref_probs(mv_ref_p, ct); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS accum_mv_refs(mode, ct); #endif } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 3; #endif @@ -649,7 +649,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { case NEWMV: -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 5; #endif @@ -692,7 +692,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) if (blockmode == NEW4X4) { -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 11; #endif write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc); @@ -769,7 +769,7 @@ static void write_kfmodes(VP8_COMP *cpi) const B_PREDICTION_MODE L = left_block_mode(m, i); const int bm = m->bmi[i].as_mode; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS ++intra_mode_stats [A] [L] [bm]; #endif @@ -1154,7 +1154,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS ++ tree_update_hist [i][j][k][t] [u]; #endif @@ -1175,7 +1175,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) while (++t < ENTROPY_NODES); /* Accum token counts for generation of default statistics */ -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS t = 0; do @@ -1521,7 +1521,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (pc->frame_type != KEY_FRAME) vp8_write_bit(bc, pc->refresh_last_frame); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS if (pc->frame_type == INTER_FRAME) active_section = 0; @@ -1544,7 +1544,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_update_coef_probs(cpi); #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 2; #endif @@ -1555,7 +1555,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest { write_kfmodes(cpi); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 8; #endif } @@ -1563,7 +1563,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest { pack_inter_mode_mvs(cpi); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 1; #endif } @@ -1681,7 +1681,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest #endif } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void print_tree_update_probs() { int i, j, k, l; diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c index 74770a276..3b0c03a14 100644 --- a/vp8/encoder/boolhuff.c +++ b/vp8/encoder/boolhuff.c @@ -16,7 +16,7 @@ unsigned __int64 Sectionbits[500]; #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS unsigned int active_section = 0; #endif diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h index 830906306..39ab586b5 100644 --- a/vp8/encoder/boolhuff.h +++ b/vp8/encoder/boolhuff.h @@ -67,7 +67,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) unsigned int lowvalue = br->lowvalue; register unsigned int shift; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS #if defined(SECTIONBITS_OUTPUT) if (bit) diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index 0c43d0692..2a74ff4ae 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -16,7 +16,7 @@ #include <math.h> -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS extern unsigned int active_section; #endif @@ -359,7 +359,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi) vp8_writer *const w = cpi->bc; MV_CONTEXT *mvc = cpi->common.fc.mvc; int flags[2] = {0, 0}; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 4; #endif write_component_probs( @@ -374,7 +374,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi) if (flags[0] || flags[1]) vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 5; #endif } diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index a34af6428..2c59872f7 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -18,7 +18,7 @@ #include <math.h> #include "vp8/common/findnearmv.h" -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS static int mv_ref_ct [31] [4] [2]; static int mv_mode_cts [4] [2]; #endif @@ -1912,7 +1912,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void print_mode_context(void) { FILE *f = fopen("modecont.c", "w"); @@ -1965,8 +1965,8 @@ void print_mode_context(void) fclose(f); } -/* MV ref count ENTROPY_STATS stats code */ -#ifdef ENTROPY_STATS +/* MV ref count VP8_ENTROPY_STATS stats code */ +#ifdef VP8_ENTROPY_STATS void init_mv_ref_counts() { vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); @@ -2020,6 +2020,6 @@ void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) } } -#endif/* END MV ref count ENTROPY_STATS stats code */ +#endif/* END MV ref count VP8_ENTROPY_STATS stats code */ #endif diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 890113f9a..e36c51543 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -15,7 +15,7 @@ #include "block.h" #include "vp8/common/variance.h" -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS extern void init_mv_ref_counts(); extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); #endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 92f981857..124b1cb35 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -111,7 +111,7 @@ extern int skip_false_count; #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS extern int intra_mode_stats[10][10][10]; #endif @@ -1806,7 +1806,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) else cpi->cyclic_refresh_map = (signed char *) NULL; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS init_context_counters(); #endif @@ -1924,7 +1924,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->mb.rd_thresh_mult[i] = 128; } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS init_mv_ref_counts(); #endif @@ -2061,7 +2061,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS print_context_counters(); print_tree_update_probs(); print_mode_context(); @@ -2243,7 +2243,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) } #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS { int i, j, k; FILE *fmode = fopen("modecontext.c", "w"); diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 3b5268b61..11559a720 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -20,7 +20,7 @@ /* Global event counters used for accumulating statistics across several compressions, then generating context.c = initial stats. */ -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; @@ -413,7 +413,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void init_context_counters(void) { diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index c2d1438f9..1e6cea114 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -33,7 +33,7 @@ typedef struct int rd_cost_mby(MACROBLOCKD *); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void init_context_counters(); void print_context_counters(); diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3dfef41b2..23d0bfd59 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -288,7 +288,11 @@ struct scale_factors { int y_den; int y_offset_q4; int y_step_q4; +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT + convolve_fn_t predict[2][2][8]; // horiz, vert, weight (0 - 7) +#else convolve_fn_t predict[2][2][2]; // horiz, vert, avg +#endif }; typedef struct macroblockd { diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index b062e7dc7..3ab8bec7a 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -122,6 +122,78 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride, } } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT + +static inline uint8_t combine_qtr(uint8_t a, uint8_t b) { + return (((a) + (b) * 3 + 2) >> 2); +} + +static inline uint8_t combine_3qtr(uint8_t a, uint8_t b) { + return (((a) * 3 + (b) + 2) >> 2); +} + +static inline uint8_t combine_1by8(uint8_t a, uint8_t b) { + return (((a) * 1 + (b) * 7 + 4) >> 3); +} + +static inline uint8_t combine_3by8(uint8_t a, uint8_t b) { + return (((a) * 3 + (b) * 5 + 4) >> 3); +} + +static inline uint8_t combine_5by8(uint8_t a, uint8_t b) { + return (((a) * 5 + (b) * 3 + 4) >> 3); +} + +static inline uint8_t combine_7by8(uint8_t a, uint8_t b) { + return (((a) * 7 + (b) * 1 + 4) >> 3); +} + +// TODO(debargha): Implment with a separate weight parameter +static void convolve_wtd_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x0, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int taps, + uint8_t (*combine)(uint8_t a, uint8_t b)) { + int x, y, k, sum; + const int16_t *filter_x_base = filter_x0; + +#if ALIGN_FILTERS_256 + filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source line */ + src -= taps / 2 - 1; + + for (y = 0; y < h; ++y) { + /* Pointer to filter to use */ + const int16_t *filter_x = filter_x0; + + /* Initial phase offset */ + int x0_q4 = (filter_x - filter_x_base) / taps; + int x_q4 = x0_q4; + + for (x = 0; x < w; ++x) { + /* Per-pixel src offset */ + int src_x = (x_q4 - x0_q4) >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[src_x + k] * filter_x[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[x] = combine(dst[x], clip_pixel(sum >> VP9_FILTER_SHIFT)); + + /* Adjust source and filter to use for the next pixel */ + x_q4 += x_step_q4; + filter_x = filter_x_base + (x_q4 & 0xf) * taps; + } + src += src_stride; + dst += dst_stride; + } +} + +#endif + static void convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -207,6 +279,52 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride, } } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void convolve_wtd_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y0, int y_step_q4, + int w, int h, int taps, + uint8_t (*combine)(uint8_t a, uint8_t b)) { + int x, y, k, sum; + + const int16_t *filter_y_base = filter_y0; + +#if ALIGN_FILTERS_256 + filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source column */ + src -= src_stride * (taps / 2 - 1); + for (x = 0; x < w; ++x) { + /* Pointer to filter to use */ + const int16_t *filter_y = filter_y0; + + /* Initial phase offset */ + int y0_q4 = (filter_y - filter_y_base) / taps; + int y_q4 = y0_q4; + + for (y = 0; y < h; ++y) { + /* Per-pixel src offset */ + int src_y = (y_q4 - y0_q4) >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[(src_y + k) * src_stride] * filter_y[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[y * dst_stride] = combine(dst[y * dst_stride], + clip_pixel(sum >> VP9_FILTER_SHIFT)); + + /* Adjust source and filter to use for the next pixel */ + y_q4 += y_step_q4; + filter_y = filter_y_base + (y_q4 & 0xf) * taps; + } + ++src; + ++dst; + } +} +#endif + static void convolve_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -285,6 +403,68 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride, w, h, 8); } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve8_1by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_1by8); +} + +void vp9_convolve8_qtr_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_qtr); +} + +void vp9_convolve8_3by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3by8); +} + +void vp9_convolve8_5by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_5by8); +} + +void vp9_convolve8_3qtr_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3qtr); +} + +void vp9_convolve8_7by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_7by8); +} +#endif + void vp9_convolve8_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -305,6 +485,68 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride, w, h, 8); } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve8_1by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_1by8); +} + +void vp9_convolve8_qtr_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_qtr); +} + +void vp9_convolve8_3by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3by8); +} + +void vp9_convolve8_5by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_5by8); +} + +void vp9_convolve8_3qtr_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3qtr); +} + +void vp9_convolve8_7by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_7by8); +} +#endif + void vp9_convolve8_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -337,6 +579,140 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride, w, h); } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve8_1by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_1by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_qtr_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_qtr(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_3by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_3by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_5by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_5by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_3qtr_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_3qtr(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_7by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_7by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} +#endif + void vp9_convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int filter_x_stride, @@ -374,3 +750,101 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, dst += dst_stride; } } + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve_1by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_1by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_qtr(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_qtr(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_3by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_3by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_5by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_5by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_3qtr(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_3qtr(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_7by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_7by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} +#endif diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h index 8c4856187..bef2d8564 100644 --- a/vp9/common/vp9_convolve.h +++ b/vp9/common/vp9_convolve.h @@ -10,6 +10,7 @@ #ifndef VP9_COMMON_CONVOLVE_H_ #define VP9_COMMON_CONVOLVE_H_ +#include "./vpx_config.h" #include "vpx/vpx_integer.h" typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride, @@ -32,6 +33,50 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, const int16_t *filter_y, int y_step_q4, int w, int h); +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +// Not a convolution, a block wtd (1/8, 7/8) average for (dst, src) +void vp9_convolve_1by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block wtd (1/4, 3/4) average for (dst, src) +void vp9_convolve_qtr(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block wtd (3/8, 5/8) average for (dst, src) +void vp9_convolve_3by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block wtd (5/8, 3/8) average for (dst, src) +void vp9_convolve_5by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block wtd (3/4, 1/4) average for (dst, src) +void vp9_convolve_3qtr(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block wtd (7/8, 1/8) average for (dst, src) +void vp9_convolve_7by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); +#endif + struct subpix_fn_table { const int16_t (*filter_x)[8]; const int16_t (*filter_y)[8]; diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 455fb8ccc..c9be8b229 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -19,169 +19,169 @@ static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 208, 26, 124, 168, 135, 159, 164, 134, 213, 172, 169 }, - { 112, 36, 114, 171, 139, 161, 165, 138, 208, 167, 168 }, - { 21, 27, 55, 109, 115, 147, 126, 121, 190, 151, 167 } + { 208, 32, 178, 198, 161, 167, 196, 147, 244, 194, 210 }, + { 102, 43, 132, 185, 148, 162, 185, 141, 237, 181, 215 }, + { 15, 36, 68, 143, 119, 151, 169, 133, 230, 173, 214 } }, { /* Coeff Band 1 */ - { 1, 94, 156, 203, 156, 169, 200, 154, 230, 184, 206 }, - { 85, 91, 161, 202, 155, 170, 198, 151, 233, 184, 214 }, - { 57, 78, 131, 203, 157, 169, 198, 152, 231, 184, 214 }, - { 36, 68, 104, 191, 135, 164, 199, 153, 231, 183, 208 }, - { 16, 51, 68, 154, 115, 150, 192, 140, 231, 184, 210 }, - { 5, 31, 32, 83, 100, 140, 121, 115, 192, 153, 182 } + { 71, 91, 178, 226, 169, 176, 232, 170, 252, 219, 231 }, + { 72, 88, 174, 226, 168, 176, 232, 170, 252, 219, 234 }, + { 40, 79, 154, 222, 161, 174, 231, 169, 251, 219, 238 }, + { 21, 68, 126, 211, 144, 167, 230, 167, 252, 219, 236 }, + { 7, 49, 84, 175, 121, 152, 223, 151, 251, 218, 237 }, + { 1, 20, 32, 100, 97, 140, 163, 116, 237, 186, 222 } }, { /* Coeff Band 2 */ - { 1, 72, 146, 177, 149, 168, 157, 135, 200, 159, 184 }, - { 68, 72, 148, 180, 153, 166, 157, 139, 198, 156, 187 }, - { 29, 68, 112, 178, 150, 162, 159, 144, 191, 153, 183 }, - { 12, 57, 83, 164, 125, 157, 162, 141, 186, 156, 178 }, - { 4, 42, 52, 125, 108, 145, 161, 130, 190, 166, 170 }, - { 1, 28, 25, 67, 98, 138, 99, 110, 175, 143, 167 } + { 108, 110, 206, 237, 182, 183, 239, 181, 252, 221, 245 }, + { 72, 98, 191, 236, 180, 182, 240, 183, 252, 223, 239 }, + { 26, 77, 152, 230, 166, 179, 239, 181, 252, 222, 241 }, + { 7, 57, 106, 212, 141, 167, 236, 173, 252, 223, 243 }, + { 1, 35, 60, 171, 110, 149, 225, 155, 251, 218, 240 }, + { 1, 14, 22, 90, 86, 134, 163, 116, 238, 181, 233 } }, { /* Coeff Band 3 */ - { 1, 113, 176, 199, 161, 171, 167, 146, 188, 154, 194 }, - { 75, 97, 166, 206, 161, 172, 188, 156, 203, 164, 208 }, - { 31, 83, 131, 200, 152, 168, 191, 157, 200, 169, 206 }, - { 18, 70, 99, 185, 131, 162, 194, 153, 202, 177, 201 }, - { 8, 55, 70, 146, 115, 150, 187, 136, 215, 188, 191 }, - { 2, 46, 42, 87, 109, 144, 111, 117, 185, 148, 182 } + { 105, 139, 222, 245, 196, 192, 245, 195, 253, 229, 255 }, + { 76, 118, 205, 245, 192, 192, 247, 198, 254, 230, 255 }, + { 21, 88, 164, 240, 175, 186, 246, 197, 255, 232, 255 }, + { 5, 63, 118, 222, 149, 172, 242, 185, 255, 230, 254 }, + { 1, 42, 74, 186, 120, 157, 227, 161, 253, 220, 250 }, + { 1, 18, 30, 97, 92, 136, 163, 118, 244, 184, 244 } }, { /* Coeff Band 4 */ - { 1, 128, 191, 217, 169, 174, 203, 163, 201, 178, 196 }, - { 73, 105, 177, 220, 168, 175, 212, 167, 222, 185, 212 }, - { 22, 82, 135, 212, 157, 172, 212, 165, 220, 187, 213 }, - { 10, 65, 95, 194, 133, 162, 210, 160, 223, 194, 208 }, - { 5, 45, 59, 145, 108, 147, 196, 142, 230, 196, 197 }, - { 2, 30, 29, 76, 98, 140, 119, 112, 205, 158, 185 } + { 143, 117, 233, 251, 207, 201, 250, 210, 255, 239, 128 }, + { 99, 104, 214, 249, 200, 199, 251, 211, 255, 238, 255 }, + { 26, 81, 170, 245, 183, 192, 250, 206, 255, 242, 255 }, + { 6, 60, 116, 226, 151, 176, 242, 187, 255, 235, 255 }, + { 1, 38, 65, 178, 114, 153, 224, 157, 254, 224, 255 }, + { 1, 15, 26, 86, 88, 133, 163, 110, 251, 197, 252 } }, { /* Coeff Band 5 */ - { 1, 101, 208, 232, 179, 179, 236, 181, 243, 216, 210 }, - { 110, 84, 194, 231, 177, 180, 233, 177, 246, 213, 224 }, - { 50, 68, 148, 224, 166, 177, 229, 173, 245, 209, 215 }, - { 29, 55, 105, 207, 139, 168, 224, 167, 244, 207, 225 }, - { 17, 38, 65, 157, 111, 148, 206, 148, 242, 202, 215 }, - { 7, 18, 28, 76, 96, 138, 125, 111, 219, 162, 206 } + { 155, 74, 238, 252, 215, 206, 252, 223, 255, 255, 128 }, + { 152, 64, 223, 250, 205, 201, 254, 219, 255, 255, 128 }, + { 67, 55, 182, 246, 187, 192, 251, 210, 255, 240, 128 }, + { 27, 44, 127, 227, 155, 176, 244, 186, 255, 240, 255 }, + { 9, 27, 69, 176, 115, 152, 227, 154, 255, 229, 255 }, + { 2, 11, 28, 91, 84, 133, 177, 115, 254, 210, 255 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 221, 105, 211, 220, 170, 171, 233, 173, 241, 200, 201 }, - { 144, 102, 184, 206, 160, 167, 209, 163, 227, 186, 193 }, - { 51, 84, 132, 174, 146, 161, 165, 144, 190, 163, 175 } + { 207, 112, 234, 244, 192, 193, 246, 194, 255, 237, 255 }, + { 145, 120, 212, 233, 178, 183, 232, 177, 252, 216, 228 }, + { 77, 114, 177, 214, 164, 174, 210, 159, 245, 199, 230 } }, { /* Coeff Band 1 */ - { 1, 167, 216, 217, 170, 171, 217, 178, 213, 176, 216 }, - { 89, 146, 210, 227, 185, 182, 198, 165, 203, 166, 216 }, - { 37, 117, 171, 232, 185, 185, 197, 175, 191, 159, 212 }, - { 30, 99, 128, 224, 150, 177, 210, 179, 183, 162, 211 }, - { 25, 84, 92, 173, 121, 150, 222, 161, 201, 199, 201 }, - { 8, 56, 65, 121, 119, 148, 150, 124, 198, 170, 191 } + { 93, 174, 243, 248, 205, 200, 245, 195, 255, 232, 255 }, + { 100, 144, 231, 248, 204, 200, 244, 193, 255, 232, 255 }, + { 28, 101, 186, 247, 194, 199, 244, 194, 255, 232, 255 }, + { 9, 73, 132, 238, 155, 186, 245, 197, 255, 232, 250 }, + { 2, 44, 76, 187, 112, 151, 240, 172, 255, 235, 249 }, + { 1, 19, 33, 98, 92, 138, 176, 113, 252, 208, 249 } }, { /* Coeff Band 2 */ - { 1, 133, 198, 206, 166, 172, 188, 157, 211, 167, 206 }, - { 62, 122, 178, 203, 170, 174, 160, 151, 193, 148, 203 }, - { 19, 94, 126, 195, 156, 172, 156, 159, 172, 135, 199 }, - { 15, 78, 89, 173, 122, 158, 163, 155, 153, 138, 191 }, - { 13, 63, 61, 120, 109, 141, 167, 138, 159, 174, 174 }, - { 1, 39, 44, 77, 113, 145, 92, 116, 162, 144, 166 } + { 116, 175, 246, 250, 212, 202, 248, 198, 255, 238, 255 }, + { 78, 142, 231, 250, 208, 203, 249, 200, 255, 241, 255 }, + { 14, 93, 177, 245, 186, 196, 248, 198, 255, 241, 255 }, + { 4, 65, 122, 227, 148, 177, 244, 186, 255, 241, 243 }, + { 1, 38, 69, 180, 111, 152, 235, 162, 255, 237, 247 }, + { 1, 18, 30, 101, 89, 133, 190, 116, 255, 219, 246 } }, { /* Coeff Band 3 */ - { 1, 157, 214, 222, 176, 176, 208, 168, 213, 174, 219 }, - { 80, 134, 199, 223, 180, 181, 191, 162, 200, 161, 218 }, - { 27, 104, 155, 219, 168, 177, 192, 168, 186, 154, 214 }, - { 23, 87, 114, 203, 138, 166, 199, 165, 178, 163, 210 }, - { 16, 74, 84, 153, 118, 150, 198, 144, 194, 189, 198 }, - { 1, 50, 64, 110, 121, 149, 118, 125, 177, 149, 194 } + { 138, 183, 249, 253, 220, 209, 252, 210, 255, 251, 128 }, + { 93, 147, 237, 252, 213, 209, 253, 213, 255, 251, 128 }, + { 21, 104, 187, 247, 185, 196, 252, 210, 255, 249, 128 }, + { 6, 73, 131, 225, 147, 174, 248, 190, 255, 248, 128 }, + { 1, 47, 83, 189, 119, 155, 239, 167, 255, 246, 128 }, + { 1, 26, 44, 130, 96, 139, 209, 129, 255, 235, 255 } }, { /* Coeff Band 4 */ - { 1, 164, 216, 229, 181, 178, 223, 172, 228, 197, 221 }, - { 86, 140, 196, 225, 176, 179, 215, 169, 220, 189, 222 }, - { 30, 107, 149, 217, 160, 175, 216, 169, 212, 187, 219 }, - { 24, 85, 109, 197, 133, 161, 215, 162, 211, 195, 214 }, - { 17, 67, 76, 150, 114, 148, 202, 144, 222, 203, 204 }, - { 3, 46, 55, 100, 111, 144, 140, 117, 215, 173, 197 } + { 188, 143, 252, 255, 228, 218, 253, 218, 255, 209, 128 }, + { 137, 124, 241, 253, 215, 211, 254, 221, 255, 255, 128 }, + { 32, 89, 188, 248, 186, 198, 254, 216, 255, 253, 128 }, + { 7, 61, 122, 231, 146, 176, 252, 201, 255, 250, 128 }, + { 1, 34, 66, 186, 103, 149, 246, 176, 255, 249, 128 }, + { 1, 18, 34, 115, 91, 134, 217, 124, 255, 233, 255 } }, { /* Coeff Band 5 */ - { 1, 120, 224, 237, 184, 181, 241, 188, 249, 228, 231 }, - { 139, 95, 209, 236, 184, 184, 237, 182, 247, 224, 230 }, - { 67, 79, 160, 232, 172, 181, 236, 182, 246, 219, 233 }, - { 48, 65, 120, 216, 141, 168, 234, 177, 245, 219, 229 }, - { 32, 52, 85, 171, 119, 151, 222, 156, 246, 216, 224 }, - { 13, 39, 58, 112, 111, 144, 157, 121, 229, 182, 211 } + { 198, 92, 253, 255, 231, 222, 255, 230, 128, 128, 128 }, + { 189, 79, 244, 254, 220, 217, 255, 237, 255, 255, 128 }, + { 78, 61, 200, 252, 196, 207, 255, 231, 255, 255, 128 }, + { 34, 50, 146, 242, 161, 187, 255, 222, 255, 255, 128 }, + { 11, 38, 93, 215, 122, 159, 253, 202, 255, 255, 128 }, + { 1, 31, 55, 143, 102, 143, 227, 148, 255, 238, 128 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 220, 21, 181, 217, 157, 178, 234, 145, 248, 236, 173 }, - { 125, 26, 143, 206, 148, 172, 213, 151, 240, 207, 199 }, - { 44, 28, 84, 150, 125, 154, 171, 133, 225, 179, 192 } + { 207, 35, 219, 243, 195, 192, 243, 188, 251, 232, 238 }, + { 126, 46, 182, 230, 177, 182, 228, 171, 248, 214, 232 }, + { 51, 47, 125, 196, 147, 166, 206, 151, 245, 199, 229 } }, { /* Coeff Band 1 */ - { 1, 137, 209, 231, 181, 181, 223, 173, 245, 202, 236 }, - { 147, 130, 214, 232, 181, 183, 224, 172, 245, 204, 220 }, - { 111, 112, 183, 234, 188, 186, 223, 175, 246, 202, 237 }, - { 89, 100, 159, 227, 163, 178, 222, 173, 246, 203, 220 }, - { 55, 80, 124, 201, 142, 166, 219, 163, 246, 205, 223 }, - { 23, 45, 70, 130, 119, 151, 157, 128, 224, 170, 207 } + { 114, 124, 220, 244, 197, 192, 242, 189, 253, 226, 255 }, + { 142, 116, 213, 243, 194, 191, 241, 188, 252, 226, 255 }, + { 81, 101, 190, 242, 188, 190, 242, 190, 253, 229, 255 }, + { 42, 83, 155, 235, 166, 183, 241, 190, 253, 227, 246 }, + { 16, 62, 104, 205, 133, 161, 238, 176, 254, 227, 250 }, + { 6, 40, 60, 132, 109, 145, 190, 128, 248, 202, 239 } }, { /* Coeff Band 2 */ - { 1, 62, 195, 228, 177, 179, 220, 170, 244, 201, 226 }, - { 87, 84, 172, 218, 165, 176, 212, 163, 242, 199, 228 }, - { 28, 87, 124, 206, 154, 168, 209, 159, 241, 195, 227 }, - { 10, 72, 94, 181, 127, 159, 200, 150, 240, 193, 226 }, - { 4, 47, 58, 129, 109, 145, 176, 132, 237, 183, 222 }, - { 1, 24, 26, 65, 95, 137, 109, 104, 210, 151, 197 } + { 139, 149, 228, 248, 205, 198, 244, 196, 255, 223, 255 }, + { 115, 127, 221, 248, 202, 198, 245, 198, 255, 228, 255 }, + { 43, 100, 189, 246, 195, 195, 244, 196, 254, 234, 228 }, + { 13, 77, 141, 238, 168, 187, 243, 191, 255, 232, 255 }, + { 3, 49, 88, 203, 125, 160, 237, 178, 253, 227, 251 }, + { 1, 23, 41, 118, 97, 136, 191, 127, 250, 207, 247 } }, { /* Coeff Band 3 */ - { 1, 127, 206, 236, 183, 183, 230, 180, 247, 211, 234 }, - { 113, 118, 195, 228, 174, 180, 225, 172, 248, 208, 231 }, - { 43, 109, 162, 221, 166, 175, 220, 168, 248, 207, 232 }, - { 17, 88, 126, 208, 152, 171, 214, 161, 247, 203, 236 }, - { 5, 60, 84, 172, 125, 154, 199, 149, 244, 194, 237 }, - { 1, 29, 41, 99, 104, 147, 146, 116, 227, 170, 223 } + { 119, 185, 236, 251, 216, 205, 249, 202, 253, 237, 255 }, + { 89, 140, 224, 251, 211, 205, 250, 208, 255, 241, 255 }, + { 34, 105, 189, 248, 195, 197, 250, 208, 255, 245, 255 }, + { 14, 78, 142, 235, 166, 182, 246, 194, 255, 242, 255 }, + { 5, 49, 90, 196, 128, 160, 235, 165, 255, 237, 255 }, + { 1, 22, 41, 114, 97, 139, 180, 124, 252, 201, 249 } }, { /* Coeff Band 4 */ - { 1, 151, 222, 239, 193, 188, 231, 177, 250, 218, 241 }, - { 114, 126, 203, 230, 180, 181, 226, 171, 249, 212, 246 }, - { 51, 97, 175, 218, 166, 176, 220, 165, 250, 211, 231 }, - { 23, 77, 136, 204, 155, 169, 213, 157, 248, 205, 241 }, - { 6, 50, 85, 169, 126, 158, 197, 146, 245, 197, 243 }, - { 1, 21, 37, 97, 101, 146, 146, 119, 232, 169, 232 } + { 162, 142, 244, 254, 228, 215, 255, 230, 128, 128, 128 }, + { 129, 120, 231, 253, 216, 210, 255, 228, 255, 255, 128 }, + { 44, 90, 189, 249, 195, 199, 253, 217, 255, 240, 128 }, + { 14, 65, 132, 234, 158, 181, 249, 203, 255, 248, 128 }, + { 3, 38, 72, 188, 112, 154, 239, 171, 255, 243, 128 }, + { 1, 17, 39, 110, 86, 141, 201, 123, 255, 240, 128 } }, { /* Coeff Band 5 */ - { 1, 117, 230, 239, 194, 187, 233, 179, 252, 222, 248 }, - { 148, 109, 210, 232, 184, 182, 227, 173, 252, 211, 244 }, - { 80, 84, 162, 222, 168, 178, 225, 167, 252, 207, 244 }, - { 43, 64, 122, 201, 142, 169, 218, 162, 251, 208, 254 }, - { 17, 41, 76, 155, 120, 154, 200, 141, 249, 204, 248 }, - { 5, 19, 35, 89, 99, 151, 140, 115, 241, 174, 244 } + { 167, 96, 247, 255, 230, 218, 249, 231, 255, 255, 128 }, + { 163, 84, 234, 253, 214, 209, 255, 231, 255, 255, 128 }, + { 70, 63, 185, 249, 189, 197, 255, 230, 255, 255, 128 }, + { 30, 44, 132, 238, 157, 180, 251, 210, 255, 220, 128 }, + { 13, 30, 80, 195, 121, 153, 243, 179, 255, 224, 128 }, + { 5, 13, 38, 103, 109, 128, 196, 147, 255, 255, 128 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 240, 71, 232, 234, 178, 179, 246, 180, 251, 225, 232 }, - { 168, 71, 198, 225, 167, 173, 229, 173, 247, 211, 218 }, - { 75, 63, 144, 195, 150, 164, 192, 147, 245, 202, 213 } + { 242, 90, 246, 244, 200, 192, 242, 189, 255, 234, 255 }, + { 186, 102, 228, 233, 187, 182, 231, 172, 254, 225, 252 }, + { 102, 108, 203, 228, 181, 180, 218, 167, 243, 201, 223 } }, { /* Coeff Band 1 */ - { 1, 165, 237, 243, 186, 184, 247, 206, 255, 238, 255 }, - { 159, 149, 229, 241, 192, 183, 244, 194, 255, 237, 239 }, - { 110, 126, 195, 243, 196, 196, 239, 187, 255, 237, 242 }, - { 89, 114, 170, 237, 168, 181, 239, 192, 254, 232, 241 }, - { 54, 96, 145, 210, 151, 166, 237, 173, 253, 234, 249 }, - { 17, 65, 108, 187, 140, 165, 194, 148, 244, 199, 227 } + { 152, 169, 250, 253, 223, 209, 251, 208, 255, 250, 128 }, + { 164, 149, 242, 253, 222, 209, 249, 207, 253, 238, 255 }, + { 63, 108, 204, 252, 215, 211, 251, 211, 255, 242, 128 }, + { 39, 83, 153, 248, 175, 199, 250, 214, 255, 245, 128 }, + { 31, 66, 108, 214, 130, 161, 251, 196, 255, 237, 128 }, + { 27, 65, 71, 150, 112, 149, 213, 133, 255, 230, 255 } }, { /* Coeff Band 2 */ - { 1, 124, 227, 239, 183, 184, 240, 195, 249, 224, 240 }, - { 112, 132, 206, 235, 183, 184, 232, 180, 246, 220, 234 }, - { 36, 116, 161, 228, 170, 180, 229, 176, 244, 218, 239 }, - { 22, 107, 126, 210, 139, 167, 225, 169, 244, 218, 229 }, - { 9, 82, 90, 163, 122, 151, 210, 149, 246, 212, 227 }, - { 1, 43, 51, 102, 105, 144, 152, 117, 234, 182, 213 } + { 161, 174, 250, 254, 226, 215, 254, 226, 255, 230, 128 }, + { 133, 150, 239, 254, 222, 213, 254, 225, 255, 255, 128 }, + { 32, 105, 197, 252, 206, 207, 253, 220, 255, 255, 128 }, + { 10, 78, 147, 245, 173, 193, 253, 212, 255, 255, 128 }, + { 2, 49, 99, 221, 133, 164, 250, 198, 255, 252, 128 }, + { 1, 26, 53, 154, 96, 135, 234, 142, 255, 240, 128 } }, { /* Coeff Band 3 */ - { 1, 160, 234, 244, 195, 188, 244, 197, 251, 231, 250 }, - { 119, 142, 220, 241, 192, 189, 241, 188, 251, 229, 243 }, - { 38, 110, 180, 238, 183, 185, 238, 185, 251, 227, 246 }, - { 27, 95, 130, 229, 164, 181, 234, 178, 251, 223, 233 }, - { 13, 79, 97, 185, 125, 153, 223, 164, 250, 217, 238 }, - { 1, 45, 57, 110, 111, 143, 164, 119, 235, 183, 220 } + { 160, 187, 251, 255, 234, 223, 255, 233, 128, 128, 128 }, + { 131, 155, 241, 255, 228, 222, 255, 232, 255, 255, 128 }, + { 42, 108, 198, 253, 207, 212, 255, 234, 255, 255, 128 }, + { 18, 81, 151, 246, 176, 194, 254, 222, 255, 255, 128 }, + { 9, 60, 112, 225, 144, 167, 252, 199, 255, 255, 128 }, + { 5, 35, 49, 163, 113, 150, 237, 118, 255, 255, 128 } }, { /* Coeff Band 4 */ - { 1, 166, 239, 247, 207, 196, 244, 198, 251, 225, 245 }, - { 119, 146, 224, 244, 199, 192, 240, 192, 251, 223, 240 }, - { 46, 108, 189, 237, 180, 191, 236, 186, 249, 218, 248 }, - { 29, 89, 154, 223, 165, 177, 228, 173, 250, 213, 224 }, - { 8, 63, 104, 189, 139, 163, 207, 154, 246, 200, 241 }, - { 1, 27, 40, 103, 102, 144, 146, 118, 230, 165, 223 } + { 195, 141, 253, 255, 242, 232, 255, 255, 128, 128, 128 }, + { 169, 128, 245, 255, 235, 227, 255, 248, 128, 128, 128 }, + { 62, 91, 204, 255, 216, 220, 255, 233, 128, 128, 128 }, + { 23, 70, 150, 248, 178, 202, 255, 223, 128, 128, 128 }, + { 2, 44, 78, 220, 110, 164, 255, 209, 128, 128, 128 }, + { 1, 1, 128, 255, 255, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band 5 */ - { 1, 131, 242, 247, 207, 193, 244, 199, 251, 225, 248 }, - { 150, 118, 231, 244, 202, 191, 239, 191, 252, 214, 241 }, - { 79, 98, 188, 236, 185, 186, 232, 182, 251, 212, 249 }, - { 55, 80, 145, 217, 154, 174, 222, 172, 250, 204, 253 }, - { 27, 56, 94, 162, 128, 153, 198, 143, 248, 199, 240 }, - { 4, 19, 33, 77, 98, 144, 129, 110, 237, 167, 241 } + { 195, 104, 253, 255, 246, 246, 255, 171, 128, 128, 128 }, + { 197, 92, 248, 255, 239, 228, 255, 239, 128, 128, 128 }, + { 88, 71, 214, 255, 219, 220, 255, 244, 128, 128, 128 }, + { 39, 56, 160, 250, 187, 204, 255, 255, 128, 128, 128 }, + { 18, 28, 90, 217, 81, 137, 255, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } } } @@ -190,169 +190,169 @@ static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 158, 29, 127, 187, 147, 164, 183, 146, 227, 188, 162 }, - { 74, 36, 101, 162, 138, 162, 154, 134, 206, 165, 167 }, - { 15, 28, 56, 109, 119, 151, 122, 120, 190, 151, 164 } + { 196, 40, 199, 180, 158, 161, 172, 135, 226, 183, 140 }, + { 83, 38, 128, 153, 142, 157, 155, 128, 222, 164, 202 }, + { 10, 29, 55, 116, 113, 146, 150, 122, 223, 169, 200 } }, { /* Coeff Band 1 */ - { 1, 129, 178, 205, 163, 170, 200, 152, 236, 185, 215 }, - { 61, 129, 178, 205, 162, 170, 201, 152, 237, 189, 212 }, - { 42, 113, 161, 203, 159, 168, 200, 153, 237, 188, 212 }, - { 30, 91, 129, 196, 149, 166, 201, 152, 236, 186, 213 }, - { 12, 63, 86, 169, 126, 155, 195, 145, 236, 187, 213 }, - { 6, 34, 33, 89, 100, 139, 132, 115, 206, 157, 183 } + { 33, 114, 160, 211, 155, 169, 223, 162, 248, 212, 215 }, + { 69, 107, 155, 210, 154, 169, 224, 163, 248, 212, 216 }, + { 30, 91, 138, 207, 150, 168, 223, 162, 248, 212, 216 }, + { 12, 74, 115, 200, 140, 164, 222, 160, 249, 212, 219 }, + { 4, 52, 80, 172, 121, 153, 216, 149, 249, 212, 226 }, + { 1, 27, 40, 105, 101, 141, 157, 120, 231, 177, 210 } }, { /* Coeff Band 2 */ - { 1, 75, 147, 182, 152, 162, 189, 141, 223, 179, 198 }, - { 36, 71, 125, 184, 141, 161, 204, 147, 241, 200, 202 }, - { 10, 56, 83, 163, 129, 153, 194, 140, 241, 194, 215 }, - { 6, 44, 59, 139, 110, 146, 178, 131, 237, 186, 219 }, - { 5, 35, 35, 96, 101, 140, 152, 117, 227, 170, 210 }, - { 2, 25, 14, 46, 88, 129, 90, 99, 186, 138, 173 } + { 38, 159, 190, 227, 171, 177, 229, 172, 250, 214, 237 }, + { 34, 130, 182, 229, 173, 180, 231, 174, 249, 215, 234 }, + { 10, 97, 153, 226, 164, 178, 232, 175, 250, 215, 241 }, + { 3, 71, 115, 213, 145, 170, 230, 171, 251, 217, 235 }, + { 1, 41, 68, 172, 114, 152, 219, 154, 250, 212, 235 }, + { 1, 16, 27, 88, 90, 135, 155, 113, 235, 180, 216 } }, { /* Coeff Band 3 */ - { 1, 135, 179, 191, 161, 166, 198, 136, 234, 184, 215 }, - { 55, 116, 171, 216, 163, 174, 214, 163, 232, 196, 201 }, - { 17, 89, 134, 205, 153, 166, 214, 159, 241, 200, 209 }, - { 9, 69, 98, 187, 132, 159, 206, 149, 243, 198, 215 }, - { 9, 53, 58, 142, 113, 151, 189, 135, 240, 187, 219 }, - { 3, 36, 23, 69, 90, 133, 121, 109, 206, 155, 183 } + { 41, 184, 214, 238, 187, 186, 235, 180, 252, 217, 236 }, + { 24, 142, 199, 241, 188, 189, 237, 184, 252, 220, 235 }, + { 6, 97, 159, 235, 172, 184, 239, 185, 252, 221, 243 }, + { 1, 63, 110, 214, 144, 170, 234, 174, 253, 223, 243 }, + { 1, 32, 58, 166, 109, 149, 218, 152, 251, 215, 238 }, + { 1, 12, 21, 78, 85, 131, 152, 109, 236, 180, 224 } }, { /* Coeff Band 4 */ - { 1, 163, 194, 208, 171, 171, 214, 140, 240, 191, 227 }, - { 45, 129, 180, 226, 172, 180, 216, 169, 229, 186, 224 }, - { 13, 94, 138, 216, 160, 171, 219, 167, 238, 198, 217 }, - { 13, 72, 99, 196, 131, 160, 213, 156, 243, 201, 213 }, - { 18, 62, 54, 136, 109, 149, 197, 132, 242, 193, 212 }, - { 5, 40, 25, 60, 92, 133, 111, 105, 200, 150, 179 } + { 54, 207, 231, 245, 201, 193, 238, 186, 252, 221, 220 }, + { 32, 156, 213, 246, 198, 195, 242, 192, 252, 224, 245 }, + { 7, 98, 164, 240, 177, 187, 243, 193, 252, 227, 244 }, + { 2, 62, 108, 216, 143, 170, 237, 177, 254, 227, 248 }, + { 1, 32, 57, 165, 108, 148, 219, 152, 252, 217, 243 }, + { 1, 13, 22, 79, 87, 132, 153, 109, 240, 182, 232 } }, { /* Coeff Band 5 */ - { 1, 187, 223, 230, 197, 185, 216, 139, 241, 174, 241 }, - { 58, 144, 205, 236, 189, 188, 209, 168, 231, 172, 234 }, - { 18, 104, 160, 226, 171, 180, 211, 170, 234, 180, 230 }, - { 11, 76, 115, 205, 143, 166, 205, 161, 234, 182, 218 }, - { 14, 66, 66, 138, 116, 150, 192, 128, 231, 180, 204 }, - { 1, 35, 32, 61, 104, 140, 89, 105, 187, 138, 171 } + { 89, 208, 239, 250, 216, 200, 240, 190, 255, 222, 219 }, + { 53, 155, 223, 250, 209, 202, 245, 199, 253, 225, 246 }, + { 12, 102, 170, 243, 183, 192, 246, 198, 254, 230, 255 }, + { 3, 67, 111, 218, 144, 171, 239, 180, 254, 231, 248 }, + { 1, 38, 60, 164, 108, 148, 221, 152, 253, 220, 246 }, + { 1, 18, 26, 81, 88, 132, 157, 108, 245, 188, 241 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 177, 107, 211, 210, 163, 167, 237, 156, 238, 209, 204 }, - { 119, 101, 183, 200, 161, 168, 215, 148, 234, 191, 204 }, - { 39, 81, 127, 173, 144, 162, 182, 137, 226, 176, 202 } + { 205, 121, 244, 237, 187, 188, 229, 174, 248, 215, 228 }, + { 140, 120, 211, 219, 174, 177, 207, 158, 241, 195, 214 }, + { 51, 100, 152, 198, 155, 168, 199, 148, 240, 193, 207 } }, { /* Coeff Band 1 */ - { 1, 175, 199, 199, 161, 158, 242, 141, 254, 226, 249 }, - { 89, 154, 205, 213, 178, 172, 235, 138, 254, 221, 245 }, - { 32, 123, 180, 231, 183, 185, 229, 158, 253, 216, 237 }, - { 23, 102, 134, 226, 155, 177, 231, 175, 253, 215, 244 }, - { 20, 88, 95, 176, 119, 151, 228, 154, 253, 218, 240 }, - { 7, 54, 64, 120, 115, 146, 168, 119, 238, 186, 212 } + { 66, 196, 236, 247, 202, 197, 243, 193, 254, 228, 246 }, + { 99, 164, 223, 246, 199, 196, 243, 193, 254, 226, 255 }, + { 29, 122, 187, 244, 187, 194, 244, 193, 255, 227, 239 }, + { 14, 95, 145, 234, 156, 181, 244, 194, 254, 229, 246 }, + { 6, 68, 97, 190, 123, 155, 240, 168, 254, 232, 245 }, + { 3, 43, 50, 112, 105, 143, 170, 118, 245, 195, 230 } }, { /* Coeff Band 2 */ - { 1, 151, 196, 204, 163, 163, 238, 144, 252, 219, 235 }, - { 43, 128, 179, 218, 171, 175, 227, 152, 251, 214, 231 }, - { 15, 94, 126, 216, 158, 174, 223, 165, 250, 211, 231 }, - { 17, 82, 90, 190, 120, 157, 219, 160, 249, 209, 228 }, - { 15, 87, 66, 123, 104, 139, 201, 130, 247, 202, 228 }, - { 1, 43, 35, 70, 98, 134, 134, 105, 226, 168, 203 } + { 66, 202, 238, 248, 206, 199, 245, 196, 254, 233, 244 }, + { 45, 155, 218, 248, 200, 199, 245, 197, 254, 229, 208 }, + { 6, 96, 163, 242, 178, 191, 245, 196, 254, 233, 228 }, + { 2, 64, 110, 224, 142, 175, 242, 185, 254, 232, 247 }, + { 1, 34, 61, 172, 103, 147, 232, 164, 254, 226, 244 }, + { 1, 13, 24, 82, 85, 133, 165, 105, 248, 199, 242 } }, { /* Coeff Band 3 */ - { 1, 172, 203, 207, 167, 163, 242, 146, 254, 225, 243 }, - { 52, 139, 194, 224, 179, 179, 232, 153, 253, 219, 237 }, - { 19, 102, 148, 225, 166, 180, 227, 170, 252, 217, 236 }, - { 24, 87, 105, 205, 132, 161, 225, 167, 252, 215, 235 }, - { 23, 90, 76, 140, 108, 144, 213, 138, 251, 211, 235 }, - { 2, 42, 39, 80, 97, 134, 151, 109, 236, 180, 216 } + { 66, 204, 242, 251, 213, 204, 248, 204, 255, 236, 255 }, + { 38, 158, 222, 251, 206, 205, 249, 206, 255, 238, 255 }, + { 6, 95, 166, 244, 178, 194, 249, 205, 255, 236, 255 }, + { 2, 61, 111, 223, 141, 173, 244, 187, 255, 237, 255 }, + { 1, 31, 59, 171, 104, 149, 230, 158, 255, 230, 252 }, + { 1, 12, 22, 82, 79, 128, 171, 111, 251, 203, 249 } }, { /* Coeff Band 4 */ - { 1, 183, 216, 216, 178, 168, 245, 145, 255, 226, 245 }, - { 48, 149, 203, 231, 186, 185, 233, 155, 254, 220, 243 }, - { 20, 108, 154, 227, 170, 181, 227, 169, 253, 219, 240 }, - { 32, 87, 109, 205, 136, 163, 223, 166, 253, 217, 241 }, - { 33, 91, 76, 139, 110, 144, 212, 135, 252, 212, 241 }, - { 2, 39, 39, 83, 99, 136, 150, 108, 239, 181, 226 } + { 63, 214, 245, 252, 219, 208, 249, 206, 255, 241, 128 }, + { 38, 164, 228, 252, 210, 208, 251, 212, 255, 245, 255 }, + { 5, 101, 174, 246, 182, 196, 251, 207, 255, 244, 255 }, + { 1, 64, 116, 224, 142, 174, 246, 190, 255, 241, 228 }, + { 1, 34, 63, 172, 105, 148, 233, 160, 255, 235, 237 }, + { 1, 14, 26, 88, 85, 130, 177, 110, 252, 210, 250 } }, { /* Coeff Band 5 */ - { 1, 196, 231, 239, 202, 187, 244, 160, 254, 222, 242 }, - { 60, 151, 213, 240, 193, 191, 236, 175, 254, 220, 242 }, - { 13, 107, 164, 231, 173, 181, 232, 177, 253, 219, 240 }, - { 9, 78, 118, 210, 145, 169, 227, 169, 253, 218, 242 }, - { 18, 65, 76, 160, 117, 151, 210, 144, 251, 210, 239 }, - { 1, 28, 38, 92, 101, 140, 148, 113, 237, 177, 227 } + { 91, 214, 246, 254, 226, 213, 251, 210, 255, 239, 255 }, + { 55, 162, 233, 253, 215, 210, 253, 216, 255, 244, 128 }, + { 10, 104, 179, 247, 184, 196, 252, 212, 255, 247, 255 }, + { 2, 67, 119, 226, 143, 173, 249, 195, 255, 245, 255 }, + { 1, 37, 66, 175, 106, 149, 237, 164, 255, 240, 255 }, + { 1, 16, 30, 96, 87, 132, 188, 113, 255, 222, 255 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 210, 33, 210, 232, 185, 185, 210, 166, 207, 192, 146 }, - { 118, 47, 169, 220, 170, 179, 201, 160, 231, 183, 211 }, - { 40, 52, 119, 203, 146, 169, 207, 160, 242, 194, 222 } + { 211, 32, 212, 235, 185, 184, 223, 167, 239, 210, 182 }, + { 121, 47, 171, 224, 171, 180, 211, 162, 238, 195, 221 }, + { 40, 51, 118, 203, 145, 168, 211, 160, 246, 200, 236 } }, { /* Coeff Band 1 */ - { 1, 158, 215, 239, 192, 188, 234, 174, 253, 219, 230 }, - { 130, 149, 210, 238, 191, 188, 233, 174, 253, 221, 240 }, - { 59, 123, 193, 237, 188, 187, 232, 174, 252, 220, 246 }, - { 22, 89, 154, 232, 172, 183, 233, 173, 253, 219, 237 }, - { 4, 49, 83, 193, 128, 160, 227, 161, 253, 219, 233 }, - { 1, 18, 27, 87, 90, 133, 160, 112, 242, 185, 231 } + { 71, 129, 209, 244, 192, 194, 242, 188, 255, 230, 255 }, + { 118, 122, 206, 244, 192, 192, 241, 187, 254, 227, 255 }, + { 53, 104, 184, 241, 186, 190, 241, 184, 254, 232, 255 }, + { 20, 81, 148, 234, 168, 183, 240, 183, 254, 231, 240 }, + { 3, 47, 82, 197, 127, 160, 234, 166, 254, 228, 251 }, + { 1, 18, 28, 96, 88, 134, 174, 116, 247, 194, 247 } }, { /* Coeff Band 2 */ - { 1, 87, 205, 244, 192, 193, 239, 188, 252, 220, 217 }, - { 64, 93, 169, 237, 175, 186, 237, 184, 253, 222, 235 }, - { 19, 77, 130, 222, 154, 175, 231, 173, 253, 221, 223 }, - { 6, 59, 95, 196, 132, 162, 223, 160, 251, 215, 240 }, - { 1, 37, 57, 144, 109, 146, 201, 135, 250, 205, 238 }, - { 1, 17, 26, 81, 94, 138, 135, 107, 232, 168, 223 } + { 86, 162, 220, 247, 203, 198, 245, 193, 255, 237, 255 }, + { 84, 134, 216, 247, 201, 197, 244, 192, 255, 233, 255 }, + { 26, 102, 186, 243, 190, 192, 244, 192, 255, 232, 255 }, + { 7, 75, 135, 231, 163, 181, 240, 183, 255, 234, 255 }, + { 1, 46, 79, 193, 121, 157, 233, 168, 255, 225, 242 }, + { 1, 20, 35, 113, 94, 136, 191, 123, 252, 209, 250 } }, { /* Coeff Band 3 */ - { 1, 150, 219, 243, 198, 192, 237, 182, 253, 227, 245 }, - { 88, 130, 202, 239, 190, 188, 236, 180, 253, 224, 255 }, - { 25, 103, 172, 231, 175, 182, 234, 174, 253, 227, 248 }, - { 7, 78, 128, 215, 156, 172, 228, 166, 252, 222, 248 }, - { 1, 48, 76, 175, 121, 155, 212, 149, 251, 213, 237 }, - { 1, 22, 35, 101, 97, 141, 161, 120, 236, 181, 213 } + { 89, 191, 232, 250, 211, 203, 248, 202, 255, 230, 128 }, + { 67, 148, 223, 250, 207, 201, 250, 207, 255, 247, 255 }, + { 19, 105, 183, 245, 189, 193, 249, 202, 255, 244, 255 }, + { 5, 72, 127, 228, 156, 177, 245, 186, 255, 238, 255 }, + { 1, 44, 76, 190, 119, 156, 234, 167, 255, 231, 255 }, + { 1, 21, 36, 116, 92, 138, 195, 128, 250, 208, 241 } }, { /* Coeff Band 4 */ - { 1, 177, 228, 247, 206, 197, 243, 191, 255, 232, 255 }, - { 76, 143, 205, 243, 192, 192, 241, 189, 253, 223, 255 }, - { 17, 107, 163, 233, 170, 183, 239, 183, 253, 227, 218 }, - { 3, 75, 118, 216, 147, 171, 234, 174, 253, 220, 249 }, - { 1, 43, 71, 174, 118, 154, 217, 153, 250, 211, 240 }, - { 1, 19, 31, 93, 93, 136, 154, 116, 235, 178, 228 } + { 94, 210, 236, 252, 215, 206, 253, 209, 255, 247, 128 }, + { 68, 153, 224, 251, 209, 204, 251, 213, 255, 240, 128 }, + { 14, 103, 178, 246, 188, 195, 251, 209, 255, 239, 128 }, + { 2, 70, 122, 230, 154, 177, 247, 194, 255, 239, 128 }, + { 1, 42, 72, 189, 115, 153, 234, 166, 255, 229, 255 }, + { 1, 19, 34, 104, 98, 143, 180, 124, 252, 200, 255 } }, { /* Coeff Band 5 */ - { 1, 192, 230, 251, 215, 205, 245, 201, 254, 229, 255 }, - { 66, 142, 206, 248, 200, 202, 244, 197, 255, 224, 255 }, - { 21, 107, 166, 241, 176, 191, 241, 192, 253, 230, 255 }, - { 5, 79, 129, 221, 150, 173, 237, 178, 254, 226, 255 }, - { 1, 43, 72, 173, 117, 151, 217, 150, 253, 216, 245 }, - { 1, 17, 28, 93, 95, 139, 162, 114, 245, 187, 235 } + { 87, 200, 238, 254, 226, 214, 250, 212, 255, 226, 128 }, + { 55, 151, 225, 253, 217, 212, 253, 217, 255, 233, 128 }, + { 11, 106, 179, 249, 193, 200, 252, 213, 255, 247, 128 }, + { 2, 72, 124, 232, 155, 180, 246, 195, 255, 230, 128 }, + { 1, 42, 70, 182, 114, 153, 232, 163, 255, 236, 255 }, + { 1, 17, 28, 95, 92, 137, 170, 115, 252, 208, 228 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 235, 68, 250, 244, 206, 192, 229, 177, 248, 215, 237 }, - { 169, 88, 225, 235, 191, 184, 222, 170, 246, 205, 237 }, - { 65, 100, 171, 214, 166, 173, 216, 157, 249, 213, 215 } + { 238, 66, 250, 245, 205, 193, 232, 180, 254, 228, 255 }, + { 178, 84, 226, 237, 192, 185, 230, 176, 253, 217, 251 }, + { 76, 83, 168, 218, 166, 173, 225, 162, 252, 220, 243 } }, { /* Coeff Band 1 */ - { 1, 191, 246, 250, 217, 202, 244, 195, 255, 226, 128 }, - { 177, 169, 236, 250, 216, 201, 244, 194, 251, 228, 255 }, - { 70, 132, 205, 250, 209, 205, 246, 193, 254, 246, 255 }, - { 41, 108, 165, 244, 172, 194, 246, 202, 255, 229, 255 }, - { 23, 84, 126, 207, 140, 162, 244, 179, 254, 237, 255 }, - { 11, 57, 83, 149, 127, 156, 180, 126, 247, 202, 220 } + { 137, 176, 246, 252, 218, 207, 251, 208, 255, 238, 128 }, + { 176, 160, 237, 252, 217, 206, 249, 209, 255, 247, 128 }, + { 68, 128, 205, 251, 209, 207, 251, 207, 255, 248, 128 }, + { 40, 105, 167, 246, 172, 192, 252, 215, 255, 247, 128 }, + { 22, 84, 131, 214, 144, 164, 249, 185, 255, 250, 255 }, + { 11, 60, 91, 161, 130, 155, 194, 133, 253, 214, 255 } }, { /* Coeff Band 2 */ - { 1, 169, 240, 250, 212, 202, 242, 192, 252, 222, 255 }, - { 105, 151, 215, 246, 200, 197, 240, 190, 253, 221, 255 }, - { 24, 111, 166, 237, 177, 188, 236, 183, 252, 213, 255 }, - { 9, 83, 122, 218, 148, 170, 233, 174, 250, 215, 242 }, - { 1, 55, 77, 168, 118, 152, 215, 150, 248, 213, 226 }, - { 1, 26, 36, 104, 98, 146, 149, 116, 235, 182, 225 } + { 124, 192, 247, 253, 223, 210, 254, 215, 255, 255, 128 }, + { 103, 161, 234, 253, 218, 209, 253, 214, 255, 255, 128 }, + { 19, 108, 190, 250, 202, 202, 251, 213, 255, 241, 128 }, + { 6, 74, 131, 242, 165, 191, 251, 207, 255, 244, 128 }, + { 1, 41, 72, 198, 111, 151, 249, 185, 255, 248, 128 }, + { 1, 14, 24, 82, 90, 140, 185, 96, 254, 224, 255 } }, { /* Coeff Band 3 */ - { 1, 191, 243, 251, 219, 204, 246, 196, 255, 230, 128 }, - { 97, 168, 225, 248, 207, 198, 244, 193, 254, 225, 192 }, - { 15, 122, 182, 241, 187, 188, 241, 190, 251, 231, 228 }, - { 3, 83, 131, 226, 160, 178, 237, 180, 251, 222, 205 }, - { 1, 49, 77, 184, 121, 155, 222, 159, 249, 216, 249 }, - { 1, 21, 32, 98, 98, 140, 152, 113, 233, 173, 243 } + { 118, 200, 248, 254, 228, 216, 254, 222, 255, 213, 128 }, + { 91, 166, 235, 254, 220, 212, 254, 223, 255, 233, 128 }, + { 16, 110, 186, 251, 197, 201, 255, 225, 255, 255, 128 }, + { 3, 72, 124, 239, 160, 186, 253, 209, 255, 239, 128 }, + { 1, 39, 66, 198, 106, 151, 248, 191, 255, 247, 128 }, + { 1, 14, 19, 94, 74, 124, 209, 109, 255, 245, 128 } }, { /* Coeff Band 4 */ - { 1, 202, 242, 253, 226, 212, 245, 205, 254, 226, 255 }, - { 83, 168, 219, 252, 212, 211, 244, 200, 250, 215, 255 }, - { 9, 143, 174, 245, 183, 197, 241, 194, 254, 217, 255 }, - { 1, 105, 129, 228, 154, 179, 233, 179, 253, 211, 255 }, - { 1, 47, 72, 177, 116, 152, 214, 157, 251, 209, 255 }, - { 1, 18, 26, 79, 94, 137, 150, 109, 246, 175, 248 } + { 112, 213, 248, 255, 231, 218, 255, 234, 255, 255, 128 }, + { 80, 172, 234, 254, 220, 216, 255, 233, 255, 255, 128 }, + { 11, 112, 182, 251, 195, 204, 255, 231, 255, 224, 128 }, + { 2, 73, 126, 241, 159, 186, 254, 219, 255, 255, 128 }, + { 1, 40, 69, 207, 111, 159, 249, 191, 255, 255, 128 }, + { 1, 16, 24, 83, 78, 138, 230, 134, 255, 239, 128 } }, { /* Coeff Band 5 */ - { 1, 205, 236, 254, 233, 221, 247, 201, 255, 220, 128 }, - { 87, 149, 205, 254, 211, 219, 245, 207, 255, 239, 128 }, - { 56, 122, 162, 248, 164, 195, 246, 211, 255, 231, 128 }, - { 26, 108, 163, 224, 149, 169, 240, 187, 255, 238, 255 }, - { 1, 54, 89, 171, 123, 152, 219, 148, 254, 226, 255 }, - { 1, 21, 34, 99, 90, 140, 174, 112, 252, 210, 255 } + { 100, 209, 245, 255, 236, 225, 248, 231, 255, 192, 128 }, + { 65, 164, 232, 255, 226, 221, 255, 240, 255, 255, 128 }, + { 11, 117, 186, 253, 203, 209, 255, 240, 255, 255, 128 }, + { 2, 83, 136, 245, 167, 191, 253, 222, 255, 255, 128 }, + { 1, 55, 88, 213, 122, 157, 248, 182, 255, 255, 128 }, + { 1, 10, 38, 58, 85, 43, 198, 107, 255, 255, 128 } } } } @@ -361,169 +361,169 @@ static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 22, 27, 75, 145, 125, 152, 158, 133, 203, 164, 150 }, - { 6, 27, 63, 124, 120, 150, 135, 127, 190, 154, 152 }, - { 1, 19, 36, 82, 107, 143, 101, 114, 176, 140, 152 } + { 8, 26, 101, 170, 141, 159, 166, 138, 205, 164, 158 }, + { 2, 25, 67, 119, 124, 152, 121, 123, 189, 145, 175 }, + { 1, 15, 28, 67, 102, 139, 95, 107, 191, 136, 187 } }, { /* Coeff Band 1 */ - { 1, 104, 143, 189, 150, 164, 194, 146, 239, 191, 205 }, - { 49, 105, 143, 188, 149, 164, 194, 146, 238, 191, 204 }, - { 29, 96, 133, 186, 147, 163, 194, 146, 238, 192, 202 }, - { 14, 79, 112, 178, 139, 160, 193, 144, 237, 191, 205 }, - { 5, 50, 74, 151, 119, 150, 187, 137, 237, 190, 205 }, - { 1, 20, 29, 76, 98, 138, 116, 111, 197, 153, 168 } + { 22, 73, 118, 160, 137, 157, 175, 132, 242, 184, 229 }, + { 43, 73, 116, 160, 137, 157, 177, 132, 242, 185, 231 }, + { 24, 66, 105, 158, 134, 156, 175, 133, 242, 185, 232 }, + { 9, 54, 85, 150, 126, 153, 175, 132, 242, 185, 231 }, + { 2, 34, 54, 123, 109, 145, 168, 124, 242, 183, 231 }, + { 1, 14, 22, 63, 93, 134, 108, 103, 214, 149, 206 } }, { /* Coeff Band 2 */ - { 1, 61, 124, 173, 145, 162, 176, 137, 234, 179, 218 }, - { 22, 56, 98, 158, 134, 157, 171, 133, 234, 178, 216 }, - { 7, 44, 70, 137, 122, 151, 162, 128, 232, 175, 214 }, - { 2, 33, 50, 114, 110, 146, 149, 121, 229, 169, 213 }, - { 1, 21, 32, 84, 100, 139, 127, 112, 220, 158, 207 }, - { 1, 11, 16, 46, 91, 133, 79, 100, 175, 133, 163 } + { 34, 123, 149, 186, 148, 163, 195, 143, 245, 195, 233 }, + { 34, 106, 147, 189, 149, 164, 198, 146, 246, 197, 234 }, + { 10, 81, 123, 186, 143, 162, 200, 147, 246, 198, 235 }, + { 2, 56, 87, 170, 127, 156, 201, 143, 248, 202, 234 }, + { 1, 35, 56, 138, 109, 146, 187, 133, 246, 196, 233 }, + { 1, 17, 27, 80, 93, 135, 136, 109, 229, 168, 215 } }, { /* Coeff Band 3 */ - { 1, 121, 166, 205, 160, 170, 204, 153, 240, 195, 210 }, - { 34, 101, 146, 198, 153, 167, 202, 152, 239, 193, 213 }, - { 9, 78, 118, 187, 142, 163, 198, 148, 238, 192, 211 }, - { 3, 60, 90, 170, 130, 157, 192, 143, 237, 190, 210 }, - { 1, 39, 59, 138, 112, 148, 177, 132, 233, 183, 207 }, - { 1, 18, 28, 75, 96, 137, 117, 110, 199, 153, 173 } + { 27, 159, 171, 208, 161, 171, 211, 155, 249, 205, 239 }, + { 17, 119, 162, 213, 160, 172, 218, 160, 250, 210, 238 }, + { 3, 81, 128, 207, 149, 168, 220, 161, 250, 213, 238 }, + { 1, 53, 87, 183, 128, 158, 217, 153, 251, 214, 239 }, + { 1, 31, 52, 143, 106, 145, 199, 137, 249, 205, 235 }, + { 1, 14, 24, 77, 89, 133, 142, 109, 234, 174, 215 } }, { /* Coeff Band 4 */ - { 1, 148, 183, 220, 169, 175, 217, 164, 244, 203, 216 }, - { 24, 115, 157, 211, 159, 171, 214, 160, 243, 201, 217 }, - { 3, 81, 120, 197, 145, 166, 209, 155, 243, 200, 216 }, - { 1, 56, 88, 176, 129, 158, 200, 147, 241, 196, 216 }, - { 1, 33, 53, 134, 108, 147, 178, 132, 236, 184, 213 }, - { 1, 13, 20, 62, 91, 135, 107, 106, 197, 148, 179 } + { 24, 189, 200, 224, 177, 178, 221, 164, 250, 212, 234 }, + { 14, 136, 184, 230, 176, 181, 228, 172, 252, 215, 231 }, + { 2, 87, 140, 222, 159, 176, 230, 172, 252, 218, 238 }, + { 1, 54, 90, 193, 130, 161, 223, 160, 252, 217, 241 }, + { 1, 28, 49, 142, 103, 144, 202, 139, 250, 208, 233 }, + { 1, 12, 21, 73, 87, 132, 141, 106, 234, 176, 209 } }, { /* Coeff Band 5 */ - { 1, 195, 212, 238, 191, 187, 229, 176, 247, 210, 222 }, - { 22, 136, 185, 230, 176, 182, 226, 173, 247, 208, 219 }, - { 3, 88, 137, 215, 156, 173, 222, 167, 246, 207, 220 }, - { 1, 57, 94, 190, 133, 162, 213, 157, 245, 204, 217 }, - { 1, 30, 52, 138, 107, 147, 188, 135, 241, 193, 215 }, - { 1, 11, 19, 61, 89, 136, 110, 104, 203, 153, 175 } + { 32, 220, 227, 242, 199, 190, 234, 180, 251, 220, 232 }, + { 12, 155, 200, 242, 190, 191, 240, 187, 252, 225, 230 }, + { 1, 90, 144, 231, 164, 180, 240, 184, 253, 229, 239 }, + { 1, 53, 90, 198, 130, 162, 230, 165, 253, 226, 238 }, + { 1, 28, 50, 145, 103, 144, 207, 140, 251, 213, 236 }, + { 1, 13, 22, 74, 88, 132, 142, 107, 233, 176, 216 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 27, 72, 214, 225, 182, 183, 188, 159, 196, 172, 138 }, - { 13, 69, 175, 209, 167, 173, 188, 152, 216, 174, 171 }, - { 5, 52, 103, 162, 138, 160, 159, 137, 202, 164, 167 } + { 5, 61, 234, 230, 183, 183, 212, 164, 241, 199, 205 }, + { 3, 65, 184, 199, 164, 170, 182, 145, 232, 175, 223 }, + { 1, 56, 104, 154, 137, 158, 156, 131, 221, 165, 210 } }, { /* Coeff Band 1 */ - { 1, 174, 218, 237, 187, 186, 229, 176, 247, 212, 225 }, - { 122, 158, 210, 236, 185, 185, 228, 174, 247, 210, 217 }, - { 48, 133, 188, 234, 182, 184, 228, 173, 247, 210, 229 }, - { 24, 109, 157, 227, 165, 179, 227, 172, 248, 211, 226 }, - { 12, 82, 114, 198, 137, 162, 223, 162, 247, 209, 220 }, - { 6, 49, 66, 127, 116, 148, 159, 125, 221, 175, 178 } + { 46, 183, 210, 229, 181, 182, 222, 165, 252, 214, 251 }, + { 122, 166, 202, 228, 179, 181, 223, 164, 252, 217, 250 }, + { 49, 125, 177, 225, 172, 179, 223, 163, 252, 215, 253 }, + { 22, 99, 142, 216, 155, 173, 222, 164, 252, 215, 250 }, + { 8, 69, 95, 180, 127, 156, 220, 153, 252, 214, 250 }, + { 2, 38, 51, 112, 109, 144, 159, 118, 243, 184, 232 } }, { /* Coeff Band 2 */ - { 1, 151, 207, 229, 180, 181, 223, 168, 247, 210, 211 }, - { 40, 122, 175, 222, 170, 177, 220, 164, 245, 207, 209 }, - { 4, 84, 125, 207, 151, 169, 215, 159, 244, 205, 209 }, - { 1, 58, 89, 180, 129, 159, 206, 150, 243, 202, 204 }, - { 1, 35, 53, 131, 108, 145, 181, 130, 238, 192, 198 }, - { 1, 16, 24, 67, 95, 137, 109, 106, 192, 153, 155 } + { 56, 196, 218, 236, 187, 185, 231, 172, 254, 223, 239 }, + { 38, 141, 195, 235, 182, 185, 233, 174, 254, 225, 232 }, + { 7, 93, 147, 225, 164, 178, 233, 173, 255, 226, 248 }, + { 2, 63, 101, 201, 137, 165, 227, 162, 254, 225, 248 }, + { 1, 39, 61, 159, 110, 148, 213, 146, 254, 218, 247 }, + { 1, 20, 33, 98, 95, 136, 166, 115, 247, 192, 231 } }, { /* Coeff Band 3 */ - { 1, 172, 215, 238, 189, 187, 231, 178, 248, 213, 210 }, - { 44, 136, 190, 233, 179, 183, 229, 175, 248, 212, 212 }, - { 4, 94, 144, 222, 161, 176, 226, 170, 247, 211, 212 }, - { 1, 66, 103, 201, 139, 165, 219, 161, 247, 208, 212 }, - { 1, 39, 61, 153, 111, 149, 199, 142, 243, 200, 210 }, - { 1, 16, 25, 74, 93, 136, 124, 109, 204, 160, 171 } + { 44, 206, 223, 240, 193, 189, 235, 177, 255, 231, 224 }, + { 27, 147, 200, 240, 188, 189, 238, 181, 255, 229, 239 }, + { 4, 93, 147, 230, 165, 180, 238, 180, 255, 231, 237 }, + { 1, 58, 95, 201, 134, 164, 229, 164, 255, 228, 254 }, + { 1, 32, 52, 152, 105, 146, 212, 142, 254, 221, 255 }, + { 1, 14, 23, 81, 87, 133, 156, 109, 248, 191, 236 } }, { /* Coeff Band 4 */ - { 1, 185, 218, 241, 192, 190, 231, 180, 248, 213, 213 }, - { 32, 143, 191, 235, 181, 185, 229, 176, 248, 211, 215 }, - { 2, 97, 144, 223, 162, 177, 226, 171, 248, 210, 222 }, - { 1, 65, 102, 199, 138, 165, 218, 160, 247, 208, 214 }, - { 1, 35, 56, 145, 109, 147, 193, 137, 243, 198, 213 }, - { 1, 13, 21, 65, 91, 135, 115, 105, 205, 157, 179 } + { 39, 216, 227, 244, 200, 194, 237, 179, 255, 231, 255 }, + { 22, 152, 204, 243, 192, 193, 240, 186, 255, 231, 240 }, + { 2, 92, 148, 232, 167, 183, 239, 182, 255, 232, 255 }, + { 1, 55, 91, 200, 132, 164, 229, 164, 255, 230, 255 }, + { 1, 28, 47, 144, 99, 142, 211, 141, 255, 222, 251 }, + { 1, 13, 21, 75, 86, 131, 152, 103, 249, 193, 242 } }, { /* Coeff Band 5 */ - { 1, 209, 222, 243, 199, 192, 233, 181, 249, 215, 228 }, - { 23, 147, 197, 237, 185, 187, 231, 177, 249, 214, 227 }, - { 1, 94, 146, 224, 163, 178, 228, 172, 249, 213, 229 }, - { 1, 60, 99, 197, 136, 164, 220, 161, 248, 210, 227 }, - { 1, 31, 54, 141, 107, 147, 194, 136, 246, 201, 226 }, - { 1, 12, 21, 65, 90, 135, 119, 104, 217, 159, 201 } + { 34, 228, 234, 249, 213, 201, 246, 194, 255, 239, 255 }, + { 13, 161, 208, 247, 198, 197, 248, 197, 255, 243, 255 }, + { 1, 95, 148, 234, 166, 183, 246, 190, 255, 243, 236 }, + { 1, 55, 90, 199, 128, 161, 237, 168, 255, 239, 255 }, + { 1, 30, 51, 147, 102, 144, 218, 142, 255, 232, 254 }, + { 1, 16, 25, 86, 88, 131, 168, 109, 252, 207, 245 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 203, 35, 218, 235, 189, 187, 194, 174, 175, 150, 127 }, - { 95, 50, 155, 211, 161, 173, 190, 163, 198, 161, 187 }, - { 21, 46, 93, 178, 130, 157, 200, 151, 224, 186, 191 } + { 204, 33, 217, 233, 185, 184, 199, 165, 204, 163, 162 }, + { 93, 48, 151, 209, 157, 171, 193, 161, 203, 167, 189 }, + { 18, 43, 86, 173, 126, 156, 203, 149, 231, 193, 200 } }, { /* Coeff Band 1 */ - { 1, 155, 198, 236, 183, 187, 223, 175, 250, 209, 255 }, - { 115, 147, 192, 235, 182, 186, 222, 173, 244, 199, 222 }, - { 43, 124, 174, 234, 178, 186, 222, 176, 249, 201, 255 }, - { 13, 96, 143, 227, 164, 181, 223, 174, 248, 197, 237 }, - { 2, 59, 91, 197, 131, 163, 213, 162, 246, 198, 241 }, - { 1, 19, 29, 85, 96, 139, 128, 116, 215, 153, 204 } + { 43, 121, 184, 233, 173, 182, 235, 187, 248, 211, 237 }, + { 93, 117, 177, 232, 170, 180, 235, 182, 246, 204, 224 }, + { 33, 101, 158, 229, 165, 179, 235, 182, 245, 207, 236 }, + { 11, 81, 129, 221, 153, 173, 233, 179, 246, 203, 229 }, + { 2, 51, 82, 188, 124, 158, 224, 162, 248, 206, 228 }, + { 1, 18, 29, 88, 93, 137, 141, 116, 222, 161, 217 } }, { /* Coeff Band 2 */ - { 1, 91, 180, 231, 170, 180, 237, 181, 248, 213, 230 }, - { 39, 83, 139, 220, 153, 173, 233, 179, 243, 200, 228 }, - { 12, 63, 106, 203, 136, 163, 227, 170, 244, 200, 234 }, - { 5, 48, 79, 178, 123, 154, 215, 155, 244, 197, 232 }, - { 1, 32, 50, 125, 104, 144, 171, 130, 238, 181, 229 }, - { 1, 12, 18, 54, 88, 131, 92, 99, 201, 142, 193 } + { 63, 154, 199, 239, 184, 187, 236, 187, 248, 209, 221 }, + { 53, 128, 191, 239, 182, 188, 236, 188, 251, 209, 255 }, + { 14, 99, 160, 235, 172, 184, 235, 187, 249, 207, 240 }, + { 4, 75, 122, 219, 150, 173, 226, 177, 250, 204, 240 }, + { 1, 47, 77, 176, 121, 154, 207, 153, 245, 197, 237 }, + { 1, 18, 30, 84, 95, 136, 138, 112, 229, 167, 228 } }, { /* Coeff Band 3 */ - { 1, 152, 202, 238, 186, 188, 227, 178, 248, 205, 229 }, - { 63, 125, 183, 234, 178, 184, 225, 179, 248, 205, 228 }, - { 15, 100, 153, 227, 166, 180, 223, 173, 244, 198, 229 }, - { 4, 76, 119, 210, 149, 170, 215, 165, 245, 200, 221 }, - { 1, 46, 73, 165, 120, 154, 192, 144, 241, 189, 225 }, - { 1, 18, 27, 78, 95, 136, 124, 110, 219, 158, 207 } + { 48, 193, 210, 245, 194, 194, 241, 196, 252, 213, 255 }, + { 26, 145, 201, 245, 194, 196, 240, 195, 251, 215, 240 }, + { 6, 104, 165, 241, 179, 190, 239, 191, 253, 222, 255 }, + { 1, 73, 120, 218, 151, 174, 227, 172, 251, 219, 248 }, + { 1, 42, 69, 167, 118, 153, 205, 146, 251, 206, 245 }, + { 1, 16, 27, 84, 89, 133, 148, 112, 240, 179, 238 } }, { /* Coeff Band 4 */ - { 1, 181, 211, 243, 197, 195, 228, 180, 249, 211, 252 }, - { 40, 138, 189, 237, 184, 189, 226, 178, 249, 208, 247 }, - { 7, 103, 153, 226, 166, 179, 223, 171, 249, 209, 224 }, - { 1, 71, 110, 200, 143, 166, 213, 159, 249, 206, 241 }, - { 1, 37, 60, 144, 111, 150, 189, 135, 245, 196, 232 }, - { 1, 15, 25, 75, 91, 134, 128, 108, 224, 163, 213 } + { 47, 213, 225, 248, 203, 199, 240, 194, 254, 211, 255 }, + { 32, 153, 212, 248, 201, 199, 241, 196, 251, 226, 255 }, + { 6, 102, 168, 240, 181, 190, 240, 187, 251, 225, 238 }, + { 1, 66, 111, 211, 146, 169, 229, 167, 255, 224, 244 }, + { 1, 36, 60, 157, 110, 148, 209, 143, 252, 215, 255 }, + { 1, 16, 27, 83, 90, 133, 152, 111, 244, 184, 250 } }, { /* Coeff Band 5 */ - { 1, 215, 219, 246, 205, 197, 236, 183, 252, 221, 235 }, - { 32, 146, 197, 239, 187, 188, 234, 180, 252, 223, 247 }, - { 6, 100, 150, 227, 167, 178, 233, 178, 252, 219, 233 }, - { 1, 63, 102, 203, 138, 167, 225, 162, 252, 216, 240 }, - { 1, 33, 56, 148, 109, 146, 202, 138, 250, 208, 237 }, - { 1, 15, 25, 75, 90, 131, 138, 108, 236, 171, 235 } + { 46, 225, 232, 252, 219, 208, 247, 204, 254, 233, 255 }, + { 24, 162, 214, 250, 208, 204, 247, 201, 254, 236, 255 }, + { 3, 106, 165, 242, 182, 191, 245, 196, 255, 231, 255 }, + { 1, 66, 108, 213, 142, 169, 235, 175, 255, 226, 247 }, + { 1, 35, 59, 158, 108, 147, 216, 146, 254, 220, 255 }, + { 1, 16, 27, 85, 90, 131, 159, 110, 248, 191, 252 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 228, 37, 245, 229, 199, 183, 200, 146, 240, 188, 223 }, - { 138, 62, 209, 217, 184, 177, 195, 148, 246, 186, 236 }, - { 42, 79, 146, 185, 156, 167, 183, 137, 247, 189, 251 } + { 229, 28, 245, 227, 195, 182, 200, 145, 253, 186, 255 }, + { 151, 44, 210, 214, 180, 175, 193, 146, 247, 185, 254 }, + { 55, 48, 131, 183, 148, 163, 194, 138, 249, 201, 246 } }, { /* Coeff Band 1 */ - { 1, 205, 242, 248, 210, 202, 245, 193, 233, 230, 255 }, - { 191, 185, 234, 249, 210, 201, 245, 194, 255, 197, 128 }, - { 112, 148, 214, 247, 208, 201, 246, 192, 255, 238, 128 }, - { 76, 120, 182, 246, 190, 198, 246, 202, 255, 244, 128 }, - { 51, 95, 145, 232, 156, 177, 246, 199, 255, 233, 128 }, - { 47, 71, 104, 195, 129, 158, 230, 167, 253, 224, 255 } + { 126, 165, 239, 250, 206, 204, 248, 193, 255, 255, 128 }, + { 199, 158, 231, 248, 206, 198, 247, 200, 243, 255, 255 }, + { 102, 136, 209, 248, 203, 197, 247, 201, 255, 244, 128 }, + { 64, 116, 181, 245, 185, 196, 248, 201, 255, 233, 128 }, + { 44, 98, 151, 233, 162, 179, 248, 195, 255, 242, 128 }, + { 44, 81, 119, 204, 140, 165, 222, 163, 252, 217, 255 } }, { /* Coeff Band 2 */ - { 1, 182, 235, 247, 204, 195, 246, 202, 255, 227, 128 }, - { 104, 145, 204, 243, 189, 191, 242, 199, 255, 229, 128 }, - { 35, 107, 159, 234, 167, 181, 244, 188, 255, 221, 128 }, - { 17, 87, 126, 216, 151, 168, 242, 179, 255, 242, 128 }, - { 4, 68, 91, 182, 131, 154, 222, 153, 255, 228, 128 }, - { 1, 55, 64, 126, 105, 137, 193, 121, 247, 194, 255 } + { 108, 185, 239, 252, 216, 209, 248, 205, 255, 230, 128 }, + { 91, 155, 224, 252, 211, 205, 251, 211, 255, 230, 128 }, + { 20, 116, 185, 248, 194, 196, 252, 206, 255, 255, 128 }, + { 8, 86, 141, 239, 168, 185, 248, 196, 255, 247, 128 }, + { 3, 50, 92, 206, 125, 164, 242, 176, 255, 246, 128 }, + { 1, 21, 40, 131, 85, 141, 200, 131, 247, 236, 255 } }, { /* Coeff Band 3 */ - { 1, 210, 239, 249, 209, 201, 249, 205, 255, 255, 128 }, - { 91, 162, 218, 247, 200, 195, 250, 199, 255, 255, 128 }, - { 16, 116, 173, 242, 184, 190, 251, 193, 255, 205, 128 }, - { 5, 85, 133, 228, 156, 178, 244, 184, 255, 251, 128 }, - { 1, 55, 83, 196, 125, 164, 236, 168, 249, 249, 255 }, - { 1, 24, 39, 127, 92, 154, 183, 133, 255, 192, 128 } + { 94, 198, 243, 254, 226, 215, 254, 220, 255, 255, 128 }, + { 67, 164, 228, 253, 217, 208, 250, 216, 255, 213, 128 }, + { 14, 120, 185, 250, 196, 205, 248, 205, 255, 255, 128 }, + { 4, 83, 134, 238, 161, 181, 250, 202, 255, 233, 128 }, + { 1, 48, 82, 196, 119, 157, 248, 178, 255, 255, 128 }, + { 1, 26, 38, 96, 84, 132, 221, 110, 255, 209, 128 } }, { /* Coeff Band 4 */ - { 1, 225, 242, 252, 218, 205, 251, 207, 255, 255, 128 }, - { 67, 174, 223, 249, 205, 199, 250, 210, 255, 234, 128 }, - { 10, 119, 177, 243, 186, 187, 253, 199, 255, 255, 128 }, - { 2, 81, 129, 228, 154, 177, 244, 193, 255, 251, 128 }, - { 1, 48, 78, 193, 122, 152, 240, 171, 255, 240, 128 }, - { 1, 19, 43, 116, 96, 128, 195, 135, 255, 234, 128 } + { 82, 210, 245, 255, 230, 215, 246, 221, 255, 255, 128 }, + { 55, 170, 231, 254, 222, 213, 255, 220, 255, 255, 128 }, + { 8, 118, 184, 251, 200, 207, 255, 219, 255, 255, 128 }, + { 2, 78, 126, 239, 156, 185, 251, 216, 255, 255, 128 }, + { 1, 43, 68, 189, 108, 151, 247, 187, 255, 228, 128 }, + { 1, 34, 40, 121, 114, 102, 205, 96, 255, 255, 128 } }, { /* Coeff Band 5 */ - { 1, 237, 210, 255, 213, 219, 255, 235, 255, 219, 128 }, - { 49, 163, 203, 252, 182, 198, 255, 235, 255, 255, 128 }, - { 23, 114, 156, 247, 196, 187, 255, 238, 255, 255, 128 }, - { 6, 71, 124, 248, 163, 202, 253, 203, 255, 255, 128 }, - { 1, 35, 74, 226, 160, 162, 246, 189, 255, 244, 128 }, - { 1, 16, 19, 136, 92, 164, 237, 108, 255, 255, 128 } + { 65, 228, 241, 255, 231, 214, 253, 222, 255, 255, 128 }, + { 33, 173, 226, 254, 222, 216, 255, 231, 255, 255, 128 }, + { 5, 120, 180, 251, 197, 205, 251, 226, 255, 233, 128 }, + { 1, 81, 130, 240, 159, 187, 251, 206, 255, 205, 128 }, + { 1, 51, 78, 198, 119, 168, 238, 181, 255, 171, 128 }, + { 1, 18, 49, 183, 119, 160, 255, 171, 128, 128, 128 } } } } @@ -532,169 +532,169 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 38, 32, 115, 163, 140, 164, 143, 139, 167, 157, 105 }, - { 11, 27, 73, 131, 126, 154, 131, 129, 178, 151, 138 }, - { 2, 19, 36, 83, 107, 144, 102, 116, 169, 140, 149 } + { 37, 34, 137, 205, 154, 170, 151, 159, 109, 172, 44 }, + { 3, 26, 60, 113, 123, 154, 100, 124, 152, 131, 144 }, + { 1, 13, 23, 54, 102, 139, 71, 106, 146, 123, 148 } }, { /* Coeff Band 1 */ - { 1, 116, 150, 184, 149, 164, 180, 140, 230, 178, 199 }, - { 71, 114, 149, 183, 150, 164, 181, 141, 229, 179, 203 }, - { 39, 102, 139, 182, 148, 164, 181, 142, 229, 179, 197 }, - { 16, 82, 117, 176, 143, 161, 180, 141, 230, 180, 200 }, - { 3, 49, 72, 148, 120, 152, 175, 134, 230, 178, 200 }, - { 1, 14, 21, 56, 94, 135, 92, 103, 179, 141, 158 } + { 26, 77, 122, 152, 144, 160, 143, 129, 216, 158, 201 }, + { 43, 76, 123, 152, 142, 159, 145, 129, 218, 160, 204 }, + { 25, 67, 112, 150, 141, 159, 144, 128, 218, 159, 204 }, + { 9, 54, 90, 143, 134, 156, 144, 127, 218, 159, 204 }, + { 2, 32, 52, 116, 114, 148, 138, 123, 217, 158, 207 }, + { 1, 10, 15, 44, 91, 133, 75, 99, 172, 128, 169 } }, { /* Coeff Band 2 */ - { 1, 56, 140, 180, 151, 164, 175, 140, 224, 175, 194 }, - { 28, 51, 101, 162, 135, 158, 170, 136, 222, 175, 193 }, - { 9, 38, 68, 137, 120, 151, 160, 129, 221, 172, 193 }, - { 3, 28, 47, 111, 108, 145, 145, 121, 216, 165, 192 }, - { 1, 17, 28, 76, 97, 137, 117, 110, 206, 152, 189 }, - { 1, 7, 11, 34, 89, 131, 62, 96, 154, 123, 148 } + { 32, 122, 143, 163, 145, 161, 162, 131, 226, 171, 206 }, + { 46, 105, 143, 168, 148, 161, 165, 133, 228, 174, 204 }, + { 17, 79, 116, 164, 142, 161, 166, 134, 229, 174, 206 }, + { 4, 53, 78, 143, 125, 153, 163, 129, 232, 175, 213 }, + { 1, 29, 44, 105, 105, 142, 147, 120, 228, 168, 211 }, + { 1, 12, 18, 52, 91, 133, 92, 100, 193, 140, 183 } }, { /* Coeff Band 3 */ - { 1, 129, 170, 198, 160, 169, 186, 147, 231, 181, 201 }, - { 45, 106, 147, 191, 152, 166, 186, 145, 228, 182, 197 }, - { 14, 81, 117, 178, 141, 161, 183, 143, 227, 184, 187 }, - { 4, 61, 89, 159, 129, 156, 178, 137, 226, 182, 174 }, - { 1, 39, 59, 126, 113, 146, 161, 126, 227, 176, 186 }, - { 1, 18, 26, 67, 98, 137, 103, 107, 190, 146, 166 } + { 33, 157, 160, 182, 149, 163, 185, 141, 236, 185, 218 }, + { 20, 116, 152, 188, 152, 165, 191, 144, 238, 188, 217 }, + { 4, 74, 114, 180, 141, 162, 192, 143, 240, 191, 219 }, + { 1, 44, 69, 148, 119, 151, 183, 134, 243, 192, 227 }, + { 1, 25, 40, 110, 101, 141, 162, 121, 238, 181, 223 }, + { 1, 12, 18, 56, 89, 132, 103, 101, 206, 148, 196 } }, { /* Coeff Band 4 */ - { 1, 152, 180, 211, 166, 173, 206, 154, 243, 197, 216 }, - { 24, 112, 150, 202, 155, 169, 204, 152, 242, 196, 212 }, - { 3, 76, 112, 186, 141, 163, 199, 148, 241, 195, 212 }, - { 1, 51, 80, 164, 124, 155, 191, 141, 240, 192, 212 }, - { 1, 30, 48, 123, 106, 144, 170, 127, 235, 182, 210 }, - { 1, 13, 20, 60, 92, 134, 102, 105, 189, 146, 160 } + { 25, 183, 174, 207, 159, 171, 205, 156, 243, 194, 228 }, + { 13, 124, 159, 209, 157, 171, 213, 160, 243, 200, 228 }, + { 2, 75, 117, 199, 143, 166, 215, 158, 246, 205, 230 }, + { 1, 45, 73, 165, 119, 153, 204, 144, 248, 205, 231 }, + { 1, 26, 43, 120, 101, 141, 178, 127, 242, 192, 226 }, + { 1, 12, 19, 59, 89, 132, 112, 102, 215, 154, 201 } }, { /* Coeff Band 5 */ - { 1, 212, 207, 235, 190, 187, 220, 170, 240, 200, 207 }, - { 11, 134, 179, 226, 175, 181, 214, 166, 236, 195, 201 }, - { 1, 86, 133, 210, 155, 172, 210, 161, 236, 194, 201 }, - { 1, 54, 88, 180, 129, 159, 200, 150, 235, 191, 200 }, - { 1, 27, 46, 122, 104, 143, 170, 128, 230, 181, 198 }, - { 1, 8, 15, 45, 88, 132, 81, 99, 171, 135, 154 } + { 13, 232, 223, 239, 196, 188, 225, 172, 248, 209, 226 }, + { 4, 155, 187, 237, 184, 187, 233, 180, 250, 216, 232 }, + { 1, 86, 131, 222, 156, 175, 233, 176, 251, 218, 237 }, + { 1, 49, 79, 181, 123, 157, 218, 155, 251, 214, 237 }, + { 1, 26, 43, 125, 100, 141, 188, 130, 246, 199, 231 }, + { 1, 12, 20, 62, 88, 131, 119, 102, 222, 161, 209 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 50, 51, 216, 230, 193, 186, 193, 156, 219, 181, 168 }, - { 36, 44, 174, 210, 175, 174, 186, 149, 218, 179, 172 }, - { 12, 32, 100, 161, 140, 159, 162, 135, 209, 168, 172 } + { 51, 37, 227, 237, 205, 184, 200, 162, 231, 187, 207 }, + { 9, 36, 172, 204, 176, 173, 171, 145, 217, 167, 197 }, + { 21, 26, 112, 162, 145, 162, 155, 133, 215, 165, 191 } }, { /* Coeff Band 1 */ - { 1, 179, 230, 238, 191, 185, 229, 171, 250, 213, 200 }, - { 167, 173, 225, 237, 190, 186, 231, 171, 245, 209, 223 }, - { 115, 153, 208, 237, 187, 186, 229, 174, 247, 215, 216 }, - { 71, 131, 182, 233, 176, 184, 228, 172, 247, 210, 238 }, - { 41, 108, 145, 214, 151, 169, 228, 169, 246, 208, 210 }, - { 19, 78, 95, 151, 128, 155, 168, 134, 218, 173, 175 } + { 79, 169, 219, 223, 176, 177, 222, 161, 248, 213, 244 }, + { 177, 166, 216, 222, 175, 178, 222, 161, 246, 212, 226 }, + { 119, 141, 196, 222, 174, 176, 220, 163, 250, 212, 236 }, + { 63, 117, 165, 217, 163, 175, 218, 161, 248, 209, 231 }, + { 30, 87, 117, 192, 138, 162, 216, 157, 247, 211, 224 }, + { 14, 56, 60, 119, 111, 146, 156, 123, 227, 171, 220 } }, { /* Coeff Band 2 */ - { 1, 147, 215, 231, 181, 181, 227, 171, 249, 212, 218 }, - { 65, 122, 179, 222, 168, 175, 223, 166, 248, 213, 216 }, - { 11, 85, 126, 204, 148, 167, 218, 159, 247, 208, 222 }, - { 4, 61, 89, 177, 128, 158, 206, 147, 246, 204, 208 }, - { 1, 38, 54, 130, 109, 145, 179, 128, 241, 191, 203 }, - { 1, 18, 24, 68, 96, 137, 110, 107, 196, 153, 145 } + { 88, 195, 225, 229, 181, 181, 229, 171, 252, 212, 221 }, + { 66, 145, 202, 229, 177, 180, 230, 172, 253, 220, 255 }, + { 12, 97, 152, 221, 162, 174, 230, 169, 253, 218, 249 }, + { 3, 66, 103, 198, 138, 165, 223, 159, 253, 219, 251 }, + { 1, 38, 61, 158, 110, 148, 209, 146, 252, 212, 238 }, + { 1, 19, 30, 94, 94, 136, 160, 114, 244, 185, 236 } }, { /* Coeff Band 3 */ - { 1, 182, 227, 239, 193, 187, 231, 177, 250, 214, 189 }, - { 73, 147, 202, 234, 182, 183, 230, 174, 248, 213, 219 }, - { 12, 104, 154, 223, 164, 176, 228, 171, 248, 210, 225 }, - { 3, 74, 113, 205, 143, 167, 222, 163, 246, 211, 214 }, - { 1, 45, 69, 163, 116, 151, 205, 144, 244, 202, 205 }, - { 1, 19, 30, 87, 96, 138, 134, 115, 199, 165, 133 } + { 79, 211, 228, 235, 186, 184, 233, 176, 255, 225, 255 }, + { 50, 151, 205, 235, 182, 185, 237, 177, 254, 223, 255 }, + { 7, 95, 149, 225, 162, 176, 236, 177, 254, 229, 219 }, + { 1, 62, 98, 198, 134, 164, 228, 162, 254, 224, 238 }, + { 1, 35, 57, 156, 108, 148, 211, 143, 253, 215, 238 }, + { 1, 17, 26, 87, 89, 135, 161, 113, 246, 189, 237 } }, { /* Coeff Band 4 */ - { 1, 198, 229, 242, 196, 190, 235, 182, 248, 216, 224 }, - { 55, 154, 201, 236, 183, 185, 233, 179, 247, 214, 190 }, - { 5, 101, 150, 225, 163, 177, 229, 172, 245, 210, 205 }, - { 1, 68, 106, 203, 140, 165, 223, 165, 246, 209, 194 }, - { 1, 38, 62, 154, 112, 149, 199, 143, 241, 198, 191 }, - { 1, 14, 22, 66, 94, 133, 109, 107, 178, 154, 122 } + { 68, 225, 230, 239, 190, 187, 238, 180, 252, 234, 255 }, + { 39, 156, 206, 239, 185, 187, 241, 187, 254, 231, 255 }, + { 4, 94, 147, 229, 163, 178, 242, 183, 255, 236, 224 }, + { 1, 58, 94, 200, 132, 163, 232, 166, 254, 230, 255 }, + { 1, 32, 52, 153, 104, 146, 214, 144, 253, 222, 236 }, + { 1, 15, 24, 84, 89, 131, 159, 109, 247, 192, 240 } }, { /* Coeff Band 5 */ - { 1, 237, 226, 244, 205, 196, 225, 177, 243, 203, 210 }, - { 24, 154, 200, 238, 189, 189, 221, 173, 240, 199, 210 }, - { 2, 98, 150, 224, 167, 179, 217, 168, 240, 199, 207 }, - { 1, 61, 99, 193, 137, 164, 207, 155, 239, 197, 208 }, - { 1, 28, 49, 128, 105, 145, 177, 130, 234, 185, 206 }, - { 1, 9, 16, 48, 89, 134, 89, 99, 183, 140, 169 } + { 45, 248, 234, 248, 208, 198, 244, 193, 255, 233, 255 }, + { 19, 169, 204, 246, 195, 195, 246, 199, 255, 233, 255 }, + { 2, 98, 145, 235, 166, 183, 245, 192, 255, 235, 255 }, + { 1, 59, 92, 205, 131, 164, 236, 172, 254, 231, 250 }, + { 1, 33, 52, 152, 103, 145, 216, 144, 253, 221, 240 }, + { 1, 15, 24, 83, 87, 133, 156, 110, 246, 191, 242 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 176, 22, 201, 227, 185, 189, 160, 172, 115, 141, 105 }, - { 64, 33, 120, 195, 149, 171, 170, 150, 182, 175, 139 }, - { 12, 33, 68, 151, 118, 153, 172, 138, 202, 175, 153 } + { 179, 23, 200, 222, 180, 182, 150, 152, 148, 135, 125 }, + { 60, 33, 113, 185, 143, 166, 168, 144, 189, 168, 152 }, + { 8, 31, 59, 137, 114, 150, 163, 132, 206, 171, 169 } }, { /* Coeff Band 1 */ - { 1, 125, 175, 228, 163, 176, 215, 171, 226, 193, 165 }, - { 127, 126, 174, 224, 163, 177, 212, 167, 225, 175, 235 }, - { 57, 114, 159, 223, 166, 175, 216, 167, 234, 182, 211 }, - { 23, 93, 133, 215, 150, 174, 216, 171, 233, 174, 176 }, - { 4, 56, 84, 178, 127, 157, 209, 149, 233, 197, 194 }, - { 1, 19, 26, 70, 93, 136, 114, 108, 193, 150, 167 } + { 27, 103, 158, 215, 157, 174, 209, 165, 239, 191, 233 }, + { 90, 101, 159, 213, 156, 173, 212, 164, 230, 185, 237 }, + { 39, 91, 146, 212, 155, 169, 212, 165, 232, 186, 207 }, + { 16, 75, 120, 203, 144, 169, 210, 161, 233, 189, 227 }, + { 3, 48, 76, 167, 120, 154, 199, 146, 236, 190, 218 }, + { 1, 18, 26, 72, 95, 137, 113, 109, 197, 146, 186 } }, { /* Coeff Band 2 */ - { 1, 76, 172, 217, 161, 172, 216, 165, 240, 188, 226 }, - { 41, 73, 136, 208, 152, 168, 214, 163, 233, 189, 248 }, - { 14, 59, 102, 195, 137, 163, 209, 158, 227, 184, 204 }, - { 4, 45, 75, 168, 122, 153, 197, 148, 231, 193, 178 }, - { 1, 33, 48, 118, 106, 148, 154, 126, 221, 168, 211 }, - { 1, 12, 16, 42, 90, 143, 61, 94, 159, 122, 167 } + { 45, 137, 177, 218, 166, 174, 206, 163, 234, 184, 214 }, + { 47, 117, 167, 218, 166, 176, 206, 164, 234, 182, 229 }, + { 16, 90, 136, 211, 153, 172, 205, 162, 236, 192, 231 }, + { 6, 65, 100, 188, 136, 162, 193, 155, 237, 177, 228 }, + { 1, 37, 58, 137, 113, 150, 166, 134, 229, 167, 234 }, + { 1, 13, 19, 55, 90, 132, 93, 103, 196, 137, 202 } }, { /* Coeff Band 3 */ - { 1, 134, 186, 226, 173, 180, 208, 172, 220, 179, 205 }, - { 60, 114, 164, 219, 166, 177, 207, 166, 231, 176, 208 }, - { 18, 90, 134, 208, 152, 175, 200, 164, 225, 181, 199 }, - { 7, 67, 102, 189, 139, 164, 192, 155, 225, 172, 209 }, - { 1, 39, 59, 137, 116, 151, 160, 132, 222, 166, 212 }, - { 1, 12, 17, 50, 93, 134, 82, 102, 181, 131, 190 } + { 36, 171, 194, 227, 177, 179, 208, 165, 244, 196, 245 }, + { 19, 129, 178, 227, 175, 184, 214, 165, 246, 188, 255 }, + { 5, 90, 139, 217, 158, 174, 213, 166, 246, 198, 255 }, + { 1, 59, 93, 182, 134, 162, 193, 150, 242, 188, 241 }, + { 1, 31, 49, 122, 108, 145, 160, 127, 235, 172, 229 }, + { 1, 10, 18, 54, 89, 132, 101, 99, 213, 144, 217 } }, { /* Coeff Band 4 */ - { 1, 160, 195, 229, 180, 185, 204, 163, 243, 185, 223 }, - { 31, 124, 170, 221, 170, 179, 201, 164, 240, 183, 223 }, - { 5, 91, 134, 204, 154, 170, 191, 155, 236, 178, 232 }, - { 1, 62, 95, 173, 135, 159, 180, 145, 234, 179, 225 }, - { 1, 30, 48, 116, 109, 147, 152, 123, 231, 170, 224 }, - { 1, 11, 17, 53, 90, 133, 93, 102, 201, 139, 202 } + { 37, 197, 210, 233, 187, 186, 216, 172, 250, 202, 255 }, + { 20, 142, 191, 234, 183, 186, 219, 170, 249, 207, 246 }, + { 3, 93, 144, 222, 163, 176, 219, 170, 249, 204, 224 }, + { 1, 56, 88, 179, 130, 159, 199, 148, 246, 197, 243 }, + { 1, 29, 47, 123, 104, 144, 172, 127, 244, 185, 234 }, + { 1, 14, 22, 66, 91, 130, 120, 103, 225, 158, 221 } }, { /* Coeff Band 5 */ - { 1, 215, 203, 233, 186, 183, 226, 170, 249, 213, 225 }, - { 13, 133, 175, 224, 170, 178, 224, 167, 250, 212, 235 }, - { 1, 83, 127, 209, 151, 169, 221, 162, 251, 212, 243 }, - { 1, 53, 85, 182, 127, 157, 213, 153, 250, 210, 234 }, - { 1, 30, 47, 131, 103, 143, 190, 132, 248, 200, 240 }, - { 1, 14, 21, 67, 89, 129, 126, 104, 232, 167, 223 } + { 19, 227, 223, 245, 203, 194, 238, 187, 251, 225, 217 }, + { 6, 152, 192, 242, 189, 190, 241, 190, 253, 225, 255 }, + { 1, 89, 138, 228, 161, 177, 239, 181, 254, 224, 248 }, + { 1, 52, 84, 188, 127, 157, 224, 159, 253, 222, 247 }, + { 1, 29, 47, 132, 102, 140, 196, 132, 251, 208, 244 }, + { 1, 14, 23, 71, 90, 133, 134, 103, 239, 174, 233 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 193, 35, 239, 239, 206, 194, 195, 152, 244, 200, 255 }, - { 77, 57, 198, 224, 192, 187, 181, 145, 242, 190, 248 }, - { 21, 54, 149, 197, 172, 171, 169, 138, 238, 178, 241 } + { 205, 14, 245, 235, 216, 189, 190, 146, 249, 201, 255 }, + { 97, 19, 213, 210, 194, 174, 176, 139, 241, 183, 250 }, + { 31, 20, 144, 183, 160, 167, 171, 132, 240, 184, 253 } }, { /* Coeff Band 1 */ - { 1, 227, 241, 247, 195, 195, 245, 199, 255, 255, 128 }, - { 189, 223, 237, 249, 199, 200, 238, 198, 255, 255, 128 }, - { 125, 204, 226, 247, 198, 199, 251, 213, 255, 255, 128 }, - { 101, 167, 207, 246, 193, 201, 245, 168, 255, 255, 128 }, - { 89, 121, 174, 237, 169, 184, 246, 204, 255, 255, 128 }, - { 71, 79, 135, 216, 149, 170, 234, 168, 255, 226, 128 } + { 137, 182, 245, 254, 221, 216, 255, 160, 128, 128, 128 }, + { 231, 185, 242, 251, 218, 205, 255, 233, 128, 128, 128 }, + { 170, 175, 229, 252, 205, 209, 255, 211, 128, 128, 128 }, + { 107, 157, 213, 250, 199, 205, 251, 207, 255, 255, 128 }, + { 77, 126, 183, 243, 182, 183, 252, 206, 255, 255, 128 }, + { 69, 96, 149, 229, 157, 170, 247, 169, 255, 255, 128 } }, { /* Coeff Band 2 */ - { 1, 207, 235, 250, 220, 204, 250, 201, 255, 255, 128 }, - { 103, 160, 210, 245, 195, 188, 249, 195, 255, 255, 128 }, - { 33, 130, 165, 234, 168, 183, 253, 199, 255, 255, 128 }, - { 10, 113, 138, 223, 146, 180, 248, 199, 255, 255, 128 }, - { 1, 88, 104, 172, 112, 174, 221, 126, 255, 217, 128 }, - { 1, 87, 70, 160, 68, 140, 171, 85, 255, 85, 128 } + { 107, 196, 241, 252, 211, 208, 255, 210, 128, 128, 128 }, + { 92, 162, 221, 249, 203, 195, 255, 199, 128, 128, 128 }, + { 20, 108, 181, 244, 190, 191, 250, 200, 255, 255, 128 }, + { 7, 80, 132, 241, 172, 197, 253, 191, 255, 255, 128 }, + { 2, 43, 75, 219, 122, 150, 255, 203, 128, 128, 128 }, + { 1, 15, 48, 98, 51, 192, 255, 160, 128, 128, 128 } }, { /* Coeff Band 3 */ - { 1, 230, 240, 249, 209, 200, 243, 199, 255, 228, 128 }, - { 60, 178, 218, 247, 203, 200, 247, 198, 255, 255, 128 }, - { 8, 119, 162, 241, 188, 185, 252, 202, 255, 255, 128 }, - { 2, 78, 119, 218, 149, 162, 247, 184, 255, 255, 128 }, - { 1, 48, 81, 172, 142, 148, 239, 140, 255, 239, 128 }, - { 1, 29, 23, 82, 96, 102, 181, 149, 255, 255, 128 } + { 107, 202, 244, 254, 226, 215, 255, 192, 128, 128, 128 }, + { 77, 167, 224, 252, 215, 212, 255, 235, 128, 128, 128 }, + { 14, 117, 179, 249, 191, 196, 255, 212, 128, 128, 128 }, + { 3, 84, 134, 237, 160, 194, 248, 216, 255, 255, 128 }, + { 1, 57, 84, 216, 145, 136, 255, 161, 128, 128, 128 }, + { 1, 1, 1, 255, 128, 255, 128, 128, 128, 128, 128 } }, { /* Coeff Band 4 */ - { 1, 240, 241, 250, 216, 203, 248, 188, 255, 255, 128 }, - { 60, 180, 222, 247, 202, 195, 247, 191, 255, 255, 128 }, - { 9, 120, 169, 240, 190, 189, 249, 181, 255, 255, 128 }, - { 2, 85, 126, 223, 154, 178, 240, 184, 255, 255, 128 }, - { 1, 47, 90, 198, 132, 158, 233, 162, 255, 224, 128 }, - { 1, 33, 34, 143, 116, 156, 217, 128, 255, 255, 128 } + { 88, 219, 248, 255, 239, 225, 255, 255, 128, 128, 128 }, + { 61, 178, 234, 255, 227, 227, 255, 217, 128, 128, 128 }, + { 6, 127, 188, 252, 201, 211, 255, 244, 128, 128, 128 }, + { 1, 83, 130, 248, 173, 197, 255, 175, 128, 128, 128 }, + { 1, 61, 66, 211, 121, 188, 255, 213, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band 5 */ - { 1, 250, 193, 249, 188, 193, 255, 236, 255, 255, 128 }, - { 35, 187, 185, 247, 154, 184, 255, 247, 255, 171, 128 }, - { 20, 132, 114, 223, 172, 165, 255, 229, 255, 255, 128 }, - { 4, 97, 96, 218, 96, 162, 255, 164, 255, 253, 128 }, - { 1, 57, 35, 197, 154, 173, 254, 215, 255, 255, 128 }, - { 1, 8, 2, 161, 10, 57, 230, 228, 255, 171, 128 } + { 73, 243, 250, 255, 244, 220, 255, 205, 128, 128, 128 }, + { 42, 197, 242, 255, 237, 227, 242, 166, 255, 255, 128 }, + { 10, 137, 197, 252, 214, 199, 255, 238, 128, 128, 128 }, + { 2, 85, 134, 242, 163, 185, 224, 238, 255, 255, 128 }, + { 1, 70, 69, 199, 110, 64, 255, 213, 128, 128, 128 }, + { 1, 1, 1, 1, 128, 128, 255, 1, 128, 128, 128 } } } } diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 8d9d4a525..a1dbd3e85 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -42,21 +42,174 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = { }; // Unified coefficient band structure used by all block sizes -DECLARE_ALIGNED(16, const int, vp9_coef_bands[32]) = { - 0, 1, 2, 3, 3, 3, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 5, +DECLARE_ALIGNED(16, const int, vp9_coef_bands8x8[64]) = { + 0, 1, 2, 3, 4, 4, 5, 5, + 1, 2, 3, 4, 4, 5, 5, 5, + 2, 3, 4, 4, 5, 5, 5, 5, + 3, 4, 4, 5, 5, 5, 5, 5, + 4, 4, 5, 5, 5, 5, 5, 5, + 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; DECLARE_ALIGNED(16, const int, vp9_coef_bands4x4[16]) = { - 0, 1, 2, 3, 3, 3, 4, 4, - 4, 4, 5, 5, 5, 5, 5, 5 + 0, 1, 2, 3, + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 5 }; DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 }; +#if CONFIG_SCATTERSCAN +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = { + 0, 4, 1, 5, + 8, 2, 12, 9, + 3, 6, 13, 10, + 7, 14, 11, 15, +}; + +DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]) = { + 0, 4, 8, 1, + 12, 5, 9, 2, + 13, 6, 10, 3, + 7, 14, 11, 15, +}; + +DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { + 0, 1, 4, 2, + 5, 3, 6, 8, + 9, 7, 12, 10, + 13, 11, 14, 15, +}; + +DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { + 0, 8, 1, 16, 9, 2, 17, 24, + 10, 3, 18, 25, 32, 11, 4, 26, + 33, 19, 40, 12, 34, 27, 5, 41, + 20, 48, 13, 35, 42, 28, 21, 6, + 49, 56, 36, 43, 29, 7, 14, 50, + 57, 44, 22, 37, 15, 51, 58, 30, + 45, 23, 52, 59, 38, 31, 60, 53, + 46, 39, 61, 54, 47, 62, 55, 63, +}; + +DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]) = { + 0, 8, 16, 1, 24, 9, 32, 17, + 2, 40, 25, 10, 33, 18, 48, 3, + 26, 41, 11, 56, 19, 34, 4, 49, + 27, 42, 12, 35, 20, 57, 50, 28, + 5, 43, 13, 36, 58, 51, 21, 44, + 6, 29, 59, 37, 14, 52, 22, 7, + 45, 60, 30, 15, 38, 53, 23, 46, + 31, 61, 39, 54, 47, 62, 55, 63, +}; + +DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = { + 0, 1, 2, 8, 9, 3, 16, 10, + 4, 17, 11, 24, 5, 18, 25, 12, + 19, 26, 32, 6, 13, 20, 33, 27, + 7, 34, 40, 21, 28, 41, 14, 35, + 48, 42, 29, 36, 49, 22, 43, 15, + 56, 37, 50, 44, 30, 57, 23, 51, + 58, 45, 38, 52, 31, 59, 53, 46, + 60, 39, 61, 47, 54, 55, 62, 63, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { + 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, + 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, + 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, + 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, + 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, + 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, + 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, + 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, + 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, + 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, + 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, + 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, + 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, + 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, + 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251, + 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255, +}; + +DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]) = { + 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, + 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, + 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, + 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, + 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, + 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, + 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, + 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, + 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, + 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, + 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, + 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, + 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, + 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, + 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236, + 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255, +}; + +DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = { + 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, + 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, + 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, + 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, + 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, + 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, + 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, + 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, + 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, + 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, + 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, + 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, + 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, + 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, + 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175, + 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100, + 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, + 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136, + 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, + 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234, + 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 392, 203, 108, 546, 485, 576, 298, 235, 140, 361, 516, 330, 172, 547, 45, 424, 455, 267, 393, 577, + 486, 77, 204, 517, 362, 548, 608, 14, 456, 299, 578, 109, 236, 425, 394, 487, 609, 331, 141, 579, 518, 46, 268, 15, 173, 549, 610, 640, 363, 78, 519, 488, + 300, 205, 16, 457, 580, 426, 550, 395, 110, 237, 611, 641, 332, 672, 142, 642, 269, 458, 47, 581, 427, 489, 174, 364, 520, 612, 551, 673, 79, 206, 301, 643, + 704, 17, 111, 490, 674, 238, 582, 48, 521, 613, 333, 396, 459, 143, 270, 552, 644, 705, 736, 365, 80, 675, 583, 175, 428, 706, 112, 302, 207, 614, 553, 49, + 645, 522, 737, 397, 768, 144, 334, 18, 676, 491, 239, 615, 707, 584, 81, 460, 176, 271, 738, 429, 113, 800, 366, 208, 523, 708, 646, 554, 677, 769, 19, 145, + 585, 739, 240, 303, 50, 461, 616, 398, 647, 335, 492, 177, 82, 770, 832, 555, 272, 430, 678, 209, 709, 114, 740, 801, 617, 51, 304, 679, 524, 367, 586, 241, + 20, 146, 771, 864, 83, 802, 648, 493, 399, 273, 336, 710, 178, 462, 833, 587, 741, 115, 305, 711, 368, 525, 618, 803, 210, 896, 680, 834, 772, 52, 649, 147, + 431, 494, 556, 242, 400, 865, 337, 21, 928, 179, 742, 84, 463, 274, 369, 804, 650, 557, 743, 960, 835, 619, 773, 306, 211, 526, 432, 992, 588, 712, 116, 243, + 866, 495, 681, 558, 805, 589, 401, 897, 53, 338, 148, 682, 867, 464, 275, 22, 370, 433, 307, 620, 527, 836, 774, 651, 713, 744, 85, 180, 621, 465, 929, 775, + 496, 898, 212, 339, 244, 402, 590, 117, 559, 714, 434, 23, 868, 930, 806, 683, 528, 652, 371, 961, 149, 837, 54, 899, 745, 276, 993, 497, 403, 622, 181, 776, + 746, 529, 560, 435, 86, 684, 466, 308, 591, 653, 715, 807, 340, 869, 213, 962, 245, 838, 561, 931, 808, 592, 118, 498, 372, 623, 685, 994, 467, 654, 747, 900, + 716, 277, 150, 55, 24, 404, 530, 839, 777, 655, 182, 963, 840, 686, 778, 309, 870, 341, 87, 499, 809, 624, 593, 436, 717, 932, 214, 246, 995, 718, 625, 373, + 562, 25, 119, 901, 531, 468, 964, 748, 810, 278, 779, 500, 563, 656, 405, 687, 871, 872, 594, 151, 933, 749, 841, 310, 657, 626, 595, 437, 688, 183, 996, 965, + 902, 811, 342, 750, 689, 719, 532, 56, 215, 469, 934, 374, 247, 720, 780, 564, 781, 842, 406, 26, 751, 903, 873, 57, 279, 627, 501, 658, 843, 997, 812, 904, + 88, 813, 438, 752, 935, 936, 311, 596, 533, 690, 343, 966, 874, 89, 120, 470, 721, 875, 659, 782, 565, 998, 375, 844, 845, 27, 628, 967, 121, 905, 968, 152, + 937, 814, 753, 502, 691, 783, 184, 153, 722, 407, 58, 815, 999, 660, 597, 723, 534, 906, 216, 439, 907, 248, 185, 876, 846, 692, 784, 629, 90, 969, 280, 754, + 938, 939, 217, 847, 566, 471, 785, 816, 877, 1000, 249, 878, 661, 503, 312, 970, 755, 122, 817, 281, 344, 786, 598, 724, 28, 59, 29, 154, 535, 630, 376, 1001, + 313, 908, 186, 91, 848, 849, 345, 909, 940, 879, 408, 818, 693, 1002, 971, 941, 567, 377, 218, 756, 910, 787, 440, 123, 880, 725, 662, 250, 819, 1003, 282, 972, + 850, 599, 472, 409, 155, 441, 942, 757, 788, 694, 911, 881, 314, 631, 973, 504, 187, 1004, 346, 473, 851, 943, 820, 726, 60, 505, 219, 378, 912, 974, 30, 31, + 536, 882, 1005, 92, 251, 663, 944, 913, 283, 695, 883, 568, 1006, 975, 410, 442, 945, 789, 852, 537, 1007, 124, 315, 61, 758, 821, 600, 914, 976, 569, 474, 347, + 156, 1008, 915, 93, 977, 506, 946, 727, 379, 884, 188, 632, 601, 1009, 790, 853, 978, 947, 220, 411, 125, 633, 664, 759, 252, 443, 916, 538, 157, 822, 62, 570, + 979, 284, 1010, 885, 948, 189, 475, 94, 316, 665, 696, 1011, 854, 791, 980, 221, 348, 63, 917, 602, 380, 507, 253, 126, 697, 823, 634, 285, 728, 949, 886, 95, + 158, 539, 1012, 317, 412, 444, 760, 571, 190, 981, 729, 918, 127, 666, 349, 381, 476, 855, 761, 1013, 603, 222, 159, 698, 950, 508, 254, 792, 286, 635, 887, 793, + 413, 191, 982, 445, 540, 318, 730, 667, 223, 824, 919, 1014, 350, 477, 572, 255, 825, 951, 762, 509, 604, 856, 382, 699, 287, 319, 636, 983, 794, 414, 541, 731, + 857, 888, 351, 446, 573, 1015, 668, 889, 478, 826, 383, 763, 605, 920, 510, 637, 415, 700, 921, 858, 447, 952, 542, 795, 479, 953, 732, 890, 669, 574, 511, 984, + 827, 985, 922, 1016, 764, 606, 543, 701, 859, 638, 1017, 575, 796, 954, 733, 891, 670, 607, 828, 986, 765, 923, 639, 1018, 702, 860, 955, 671, 892, 734, 797, 703, + 987, 829, 1019, 766, 924, 735, 861, 956, 988, 893, 767, 798, 830, 1020, 925, 957, 799, 862, 831, 989, 894, 1021, 863, 926, 895, 958, 990, 1022, 927, 959, 991, 1023, +}; +#else // CONFIG_SCATTERSCAN DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, @@ -85,6 +238,28 @@ DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, }; +DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]) = { + 0, 8, 16, 24, 32, 40, 48, 56, + 1, 9, 17, 25, 33, 41, 49, 57, + 2, 10, 18, 26, 34, 42, 50, 58, + 3, 11, 19, 27, 35, 43, 51, 59, + 4, 12, 20, 28, 36, 44, 52, 60, + 5, 13, 21, 29, 37, 45, 53, 61, + 6, 14, 22, 30, 38, 46, 54, 62, + 7, 15, 23, 31, 39, 47, 55, 63, +}; + +DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, +}; + DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, 5, @@ -120,6 +295,44 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 237, 252, 253, 238, 223, 239, 254, 255, }; +DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]) = { + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, + 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, + 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, + 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, + 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, + 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, + 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, + 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, + 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, + 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, + 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250, + 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251, + 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, + 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, + 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, + 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255, +}; + +DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}; + DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, 37, 6, 7, 38, 69, 100, 131, 162, 193, 224, 256, 225, 194, 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, @@ -154,6 +367,7 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, }; +#endif // CONFIG_SCATTERSCAN /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -1886,25 +2100,25 @@ vp9_extra_bit_struct vp9_extra_bits[12] = { // within the current block. // // For now it just returns the previously used context. -int vp9_get_coef_context(int * recent_energy, int token) { - // int token_energy; - // int av_energy; - - /*token_energy = ((token != DCT_EOB_TOKEN) ? token : 0); - if (!token_energy) { - if (!(*recent_energy)) { - av_energy = 0; +#define MAX_NEIGHBORS 2 +int vp9_get_coef_context(const int *scan, const int *neighbors, + int nb_pad, uint8_t *token_cache, int c, int l) { + int eob = l; + assert(nb_pad == MAX_NEIGHBORS); + if (c == eob - 1) { + return 0; + } else { + int ctx; + c++; + assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0); + if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) { + ctx = (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + + token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } else { - av_energy = 1; + ctx = token_cache[neighbors[MAX_NEIGHBORS * c + 0]]; } - } else { - av_energy = ((token_energy + *recent_energy + 1) >> 1) + 1; - if (av_energy > DCT_VAL_CATEGORY6) - av_energy = DCT_VAL_CATEGORY6; + return vp9_pt_energy_class[ctx]; } - *recent_energy = token_energy;*/ - - return vp9_pt_energy_class[token]; }; void vp9_default_coef_probs(VP9_COMMON *pc) { @@ -2063,7 +2277,152 @@ void vp9_adjust_default_coef_probs(VP9_COMMON *cm) { } #endif +// Neighborhood 5-tuples for various scans and blocksizes, +// in {top, left, topleft, topright, bottomleft} order +// for each position in raster scan order. +// -1 indicates the neighbor does not exist. +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_4x4_neighbors[16 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_col_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); + +static int find_in_scan(const int *scan, int l, int idx) { + int n, l2 = l * l; + for (n = 0; n < l2; n++) { + int rc = scan[n]; + if (rc == idx) + return n; + } + assert(0); + return -1; +} +static void init_scan_neighbors(const int *scan, int l, int *neighbors, + int max_neighbors) { + int l2 = l * l; + int n, i, j; + + for (n = 0; n < l2; n++) { + int rc = scan[n]; + assert(max_neighbors == MAX_NEIGHBORS); + i = rc / l; + j = rc % l; + if (i > 0 && j > 0) { + // col/row scan is used for adst/dct, and generally means that + // energy decreases to zero much faster in the dimension in + // which ADST is used compared to the direction in which DCT + // is used. Likewise, we find much higher correlation between + // coefficients within the direction in which DCT is used. + // Therefore, if we use ADST/DCT, prefer the DCT neighbor coeff + // as a context. If ADST or DCT is used in both directions, we + // use the combination of the two as a context. + int a = find_in_scan(scan, l, (i - 1) * l + j); + int b = find_in_scan(scan, l, i * l + j - 1); + if (scan == vp9_col_scan_4x4 || scan == vp9_col_scan_8x8 || + scan == vp9_col_scan_16x16) { + neighbors[max_neighbors * n + 0] = a; + neighbors[max_neighbors * n + 1] = -1; + } else if (scan == vp9_row_scan_4x4 || scan == vp9_row_scan_8x8 || + scan == vp9_row_scan_16x16) { + neighbors[max_neighbors * n + 0] = b; + neighbors[max_neighbors * n + 1] = -1; + } else { + neighbors[max_neighbors * n + 0] = a; + neighbors[max_neighbors * n + 1] = b; + } + } else if (i > 0) { + neighbors[max_neighbors * n + 0] = find_in_scan(scan, l, (i - 1) * l + j); + neighbors[max_neighbors * n + 1] = -1; + } else if (j > 0) { + neighbors[max_neighbors * n + 0] = + find_in_scan(scan, l, i * l + j - 1); + neighbors[max_neighbors * n + 1] = -1; + } else { + assert(n == 0); + // dc predictor doesn't use previous tokens + neighbors[max_neighbors * n + 0] = -1; + } + assert(neighbors[max_neighbors * n + 0] < n); + } +} + +void vp9_init_neighbors() { + init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4, + vp9_default_zig_zag1d_4x4_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_row_scan_4x4, 4, + vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_col_scan_4x4, 4, + vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8, + vp9_default_zig_zag1d_8x8_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_row_scan_8x8, 8, + vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_col_scan_8x8, 8, + vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, + vp9_default_zig_zag1d_16x16_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_row_scan_16x16, 16, + vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_col_scan_16x16, 16, + vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, + vp9_default_zig_zag1d_32x32_neighbors, MAX_NEIGHBORS); +} + +const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad) { + if (scan == vp9_default_zig_zag1d_4x4) { + *pad = MAX_NEIGHBORS; + return vp9_default_zig_zag1d_4x4_neighbors; + } else if (scan == vp9_row_scan_4x4) { + *pad = MAX_NEIGHBORS; + return vp9_row_scan_4x4_neighbors; + } else if (scan == vp9_col_scan_4x4) { + *pad = MAX_NEIGHBORS; + return vp9_col_scan_4x4_neighbors; + } else if (scan == vp9_default_zig_zag1d_8x8) { + *pad = MAX_NEIGHBORS; + return vp9_default_zig_zag1d_8x8_neighbors; + } else if (scan == vp9_row_scan_8x8) { + *pad = 2; + return vp9_row_scan_8x8_neighbors; + } else if (scan == vp9_col_scan_8x8) { + *pad = 2; + return vp9_col_scan_8x8_neighbors; + } else if (scan == vp9_default_zig_zag1d_16x16) { + *pad = MAX_NEIGHBORS; + return vp9_default_zig_zag1d_16x16_neighbors; + } else if (scan == vp9_row_scan_16x16) { + *pad = 2; + return vp9_row_scan_16x16_neighbors; + } else if (scan == vp9_col_scan_16x16) { + *pad = 2; + return vp9_col_scan_16x16_neighbors; + } else if (scan == vp9_default_zig_zag1d_32x32) { + *pad = MAX_NEIGHBORS; + return vp9_default_zig_zag1d_32x32_neighbors; + } else { + assert(0); + return NULL; + } +} + void vp9_coef_tree_initialize() { + vp9_init_neighbors(); init_bit_trees(); vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); #if CONFIG_CODE_NONZEROCOUNT @@ -3170,6 +3529,9 @@ void vp9_update_nzc_counts(VP9_COMMON *cm, static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, vp9_coeff_probs *pre_coef_probs, int block_types, vp9_coeff_count *coef_counts, + unsigned int (*eob_branch_count)[REF_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS], int count_sat, int update_factor) { int t, i, j, k, l, count; unsigned int branch_ct[ENTROPY_NODES][2]; @@ -3190,6 +3552,8 @@ static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, vp9_tree_probs_from_distribution(vp9_coef_tree, coef_probs, branch_ct, coef_counts[i][j][k][l], 0); + branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0]; + coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]); for (t = 0; t < entropy_nodes_adapt; ++t) { count = branch_ct[t][0] + branch_ct[t][1]; count = count > count_sat ? count_sat : count; @@ -3224,15 +3588,19 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { adapt_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4, BLOCK_TYPES, cm->fc.coef_counts_4x4, + cm->fc.eob_branch_counts[TX_4X4], count_sat, update_factor); adapt_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, BLOCK_TYPES, cm->fc.coef_counts_8x8, + cm->fc.eob_branch_counts[TX_8X8], count_sat, update_factor); adapt_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, BLOCK_TYPES, cm->fc.coef_counts_16x16, + cm->fc.eob_branch_counts[TX_16X16], count_sat, update_factor); adapt_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, BLOCK_TYPES, cm->fc.coef_counts_32x32, + cm->fc.eob_branch_counts[TX_32X32], count_sat, update_factor); } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index e7fb5019a..64f595047 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -102,7 +102,15 @@ extern DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]); extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]); + +extern DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]); +extern DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]); + extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); + +extern DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]); +extern DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]); + extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); void vp9_coef_tree_initialize(void); @@ -126,20 +134,25 @@ static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) { vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); } -extern const int vp9_coef_bands[32]; +extern const int vp9_coef_bands8x8[64]; extern const int vp9_coef_bands4x4[16]; -static int get_coef_band(TX_SIZE tx_size, int coef_index) { +static int get_coef_band(const int *scan, TX_SIZE tx_size, int coef_index) { if (tx_size == TX_4X4) { - return vp9_coef_bands4x4[coef_index]; + return vp9_coef_bands4x4[scan[coef_index]]; } else { - if (coef_index < 32) - return vp9_coef_bands[coef_index]; - else + const int pos = scan[coef_index]; + const int sz = 1 << (2 + tx_size); + const int x = pos & (sz - 1), y = pos >> (2 + tx_size); + if (x >= 8 || y >= 8) return 5; + else + return vp9_coef_bands8x8[y * 8 + x]; } } -extern int vp9_get_coef_context(int * recent_energy, int token); +extern int vp9_get_coef_context(const int *scan, const int *neighbors, + int nb_pad, uint8_t *token_cache, int c, int l); +const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad); #if CONFIG_MODELCOEFPROB #define COEFPROB_BITS 8 diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index 6f434dafe..a473cf742 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -49,47 +49,36 @@ static INLINE int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0, static INLINE void filter(int8_t mask, uint8_t hev, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { - int8_t ps0, qs0; - int8_t ps1, qs1; - int8_t filter, Filter1, Filter2; - int8_t u; - - ps1 = (int8_t) *op1 ^ 0x80; - ps0 = (int8_t) *op0 ^ 0x80; - qs0 = (int8_t) *oq0 ^ 0x80; - qs1 = (int8_t) *oq1 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter = signed_char_clamp(ps1 - qs1); - filter &= hev; - - /* inner taps */ - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)); - filter &= mask; + int8_t filter1, filter2; - /* save bottom 3 bits so that we round one side +4 and the other +3 - * if it equals 4 we'll set to adjust by -1 to account for the fact - * we'd round 3 the other way - */ - Filter1 = signed_char_clamp(filter + 4); - Filter2 = signed_char_clamp(filter + 3); - Filter1 >>= 3; - Filter2 >>= 3; - u = signed_char_clamp(qs0 - Filter1); - *oq0 = u ^ 0x80; - u = signed_char_clamp(ps0 + Filter2); - *op0 = u ^ 0x80; - filter = Filter1; - - /* outer tap adjustments */ + const int8_t ps1 = (int8_t) *op1 ^ 0x80; + const int8_t ps0 = (int8_t) *op0 ^ 0x80; + const int8_t qs0 = (int8_t) *oq0 ^ 0x80; + const int8_t qs1 = (int8_t) *oq1 ^ 0x80; + + // add outer taps if we have high edge variance + int8_t filter = signed_char_clamp(ps1 - qs1) & hev; + + // inner taps + filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; + + // save bottom 3 bits so that we round one side +4 and the other +3 + // if it equals 4 we'll set to adjust by -1 to account for the fact + // we'd round 3 the other way + filter1 = signed_char_clamp(filter + 4) >> 3; + filter2 = signed_char_clamp(filter + 3) >> 3; + + *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; + *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; + filter = filter1; + + // outer tap adjustments filter += 1; filter >>= 1; filter &= ~hev; - u = signed_char_clamp(qs1 - filter); - *oq1 = u ^ 0x80; - u = signed_char_clamp(ps1 + filter); - *op1 = u ^ 0x80; + *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; + *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; } void vp9_loop_filter_horizontal_edge_c(uint8_t *s, @@ -176,21 +165,16 @@ static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1, uint8_t *oq2, uint8_t *oq3) { - /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ + // use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line if (flat && mask) { - uint8_t p0, q0; - uint8_t p1, q1; - uint8_t p2, q2; - uint8_t p3, q3; - - p3 = *op3; - p2 = *op2; - p1 = *op1; - p0 = *op0; - q0 = *oq0; - q1 = *oq1; - q2 = *oq2; - q3 = *oq3; + const uint8_t p3 = *op3; + const uint8_t p2 = *op2; + const uint8_t p1 = *op1; + const uint8_t p0 = *op0; + const uint8_t q0 = *oq0; + const uint8_t q1 = *oq1; + const uint8_t q2 = *oq2; + const uint8_t q3 = *oq3; *op2 = (p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3; *op1 = (p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3; @@ -199,44 +183,33 @@ static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat, *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3 + 4) >> 3; *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3 + 4) >> 3; } else { - int8_t ps0, qs0; - int8_t ps1, qs1; - int8_t filter, Filter1, Filter2; - int8_t u; - - ps1 = (int8_t) *op1 ^ 0x80; - ps0 = (int8_t) *op0 ^ 0x80; - qs0 = (int8_t) *oq0 ^ 0x80; - qs1 = (int8_t) *oq1 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter = signed_char_clamp(ps1 - qs1); - filter &= hev; - - /* inner taps */ - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)); - filter &= mask; - - Filter1 = signed_char_clamp(filter + 4); - Filter2 = signed_char_clamp(filter + 3); - Filter1 >>= 3; - Filter2 >>= 3; - - u = signed_char_clamp(qs0 - Filter1); - *oq0 = u ^ 0x80; - u = signed_char_clamp(ps0 + Filter2); - *op0 = u ^ 0x80; - filter = Filter1; - - /* outer tap adjustments */ + int8_t filter1, filter2; + + const int8_t ps1 = (int8_t) *op1 ^ 0x80; + const int8_t ps0 = (int8_t) *op0 ^ 0x80; + const int8_t qs0 = (int8_t) *oq0 ^ 0x80; + const int8_t qs1 = (int8_t) *oq1 ^ 0x80; + + // add outer taps if we have high edge variance + int8_t filter = signed_char_clamp(ps1 - qs1) & hev; + + // inner taps + filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; + + filter1 = signed_char_clamp(filter + 4) >> 3; + filter2 = signed_char_clamp(filter + 3) >> 3; + + *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; + *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; + filter = filter1; + + // outer tap adjustments filter += 1; filter >>= 1; filter &= ~hev; - u = signed_char_clamp(qs1 - filter); - *oq1 = u ^ 0x80; - u = signed_char_clamp(ps1 + filter); - *op1 = u ^ 0x80; + *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; + *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; } } @@ -304,38 +277,28 @@ void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, static INLINE int8_t simple_filter_mask(uint8_t blimit, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1) { - /* Why does this cause problems for win32? - * error C2143: syntax error : missing ';' before 'type' - * (void) limit; - */ - int8_t mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; - return mask; + return (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; } static INLINE void simple_filter(int8_t mask, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { - int8_t filter, Filter1, Filter2; - int8_t p1 = (int8_t) *op1 ^ 0x80; - int8_t p0 = (int8_t) *op0 ^ 0x80; - int8_t q0 = (int8_t) *oq0 ^ 0x80; - int8_t q1 = (int8_t) *oq1 ^ 0x80; - int8_t u; - - filter = signed_char_clamp(p1 - q1); + int8_t filter1, filter2; + const int8_t p1 = (int8_t) *op1 ^ 0x80; + const int8_t p0 = (int8_t) *op0 ^ 0x80; + const int8_t q0 = (int8_t) *oq0 ^ 0x80; + const int8_t q1 = (int8_t) *oq1 ^ 0x80; + + int8_t filter = signed_char_clamp(p1 - q1); filter = signed_char_clamp(filter + 3 * (q0 - p0)); filter &= mask; - /* save bottom 3 bits so that we round one side +4 and the other +3 */ - Filter1 = signed_char_clamp(filter + 4); - Filter1 >>= 3; - u = signed_char_clamp(q0 - Filter1); - *oq0 = u ^ 0x80; + // save bottom 3 bits so that we round one side +4 and the other +3 + filter1 = signed_char_clamp(filter + 4) >> 3; + *oq0 = signed_char_clamp(q0 - filter1) ^ 0x80; - Filter2 = signed_char_clamp(filter + 3); - Filter2 >>= 3; - u = signed_char_clamp(p0 + Filter2); - *op0 = u ^ 0x80; + filter2 = signed_char_clamp(filter + 3) >> 3; + *op0 = signed_char_clamp(p0 + filter2) ^ 0x80; } void vp9_loop_filter_simple_horizontal_edge_c(uint8_t *s, @@ -495,33 +458,24 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev, uint8_t *oq1, uint8_t *oq2, uint8_t *oq3, uint8_t *oq4, uint8_t *oq5, uint8_t *oq6, uint8_t *oq7) { - /* use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line */ + // use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line if (flat2 && flat && mask) { - uint8_t p0, q0; - uint8_t p1, q1; - uint8_t p2, q2; - uint8_t p3, q3; - uint8_t p4, q4; - uint8_t p5, q5; - uint8_t p6, q6; - uint8_t p7, q7; - - p7 = *op7; - p6 = *op6; - p5 = *op5; - p4 = *op4; - p3 = *op3; - p2 = *op2; - p1 = *op1; - p0 = *op0; - q0 = *oq0; - q1 = *oq1; - q2 = *oq2; - q3 = *oq3; - q4 = *oq4; - q5 = *oq5; - q6 = *oq6; - q7 = *oq7; + const uint8_t p7 = *op7; + const uint8_t p6 = *op6; + const uint8_t p5 = *op5; + const uint8_t p4 = *op4; + const uint8_t p3 = *op3; + const uint8_t p2 = *op2; + const uint8_t p1 = *op1; + const uint8_t p0 = *op0; + const uint8_t q0 = *oq0; + const uint8_t q1 = *oq1; + const uint8_t q2 = *oq2; + const uint8_t q3 = *oq3; + const uint8_t q4 = *oq4; + const uint8_t q5 = *oq5; + const uint8_t q6 = *oq6; + const uint8_t q7 = *oq7; *op6 = (p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4; @@ -552,19 +506,14 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev, *oq6 = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7 + 8) >> 4; } else if (flat && mask) { - unsigned char p0, q0; - unsigned char p1, q1; - unsigned char p2, q2; - unsigned char p3, q3; - - p3 = *op3; - p2 = *op2; - p1 = *op1; - p0 = *op0; - q0 = *oq0; - q1 = *oq1; - q2 = *oq2; - q3 = *oq3; + const uint8_t p3 = *op3; + const uint8_t p2 = *op2; + const uint8_t p1 = *op1; + const uint8_t p0 = *op0; + const uint8_t q0 = *oq0; + const uint8_t q1 = *oq1; + const uint8_t q2 = *oq2; + const uint8_t q3 = *oq3; *op2 = (p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3; *op1 = (p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3; @@ -573,44 +522,32 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev, *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3 + 4) >> 3; *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3 + 4) >> 3; } else { - signed char ps0, qs0; - signed char ps1, qs1; - signed char filter, Filter1, Filter2; - signed char u; - - ps1 = (signed char) * op1 ^ 0x80; - ps0 = (signed char) * op0 ^ 0x80; - qs0 = (signed char) * oq0 ^ 0x80; - qs1 = (signed char) * oq1 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter = signed_char_clamp(ps1 - qs1); - filter &= hev; - - /* inner taps */ - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)); - filter &= mask; - - Filter1 = signed_char_clamp(filter + 4); - Filter2 = signed_char_clamp(filter + 3); - Filter1 >>= 3; - Filter2 >>= 3; - - u = signed_char_clamp(qs0 - Filter1); - *oq0 = u ^ 0x80; - u = signed_char_clamp(ps0 + Filter2); - *op0 = u ^ 0x80; - filter = Filter1; - - /* outer tap adjustments */ + int8_t filter1, filter2; + + const int8_t ps1 = (int8_t) * op1 ^ 0x80; + const int8_t ps0 = (int8_t) * op0 ^ 0x80; + const int8_t qs0 = (int8_t) * oq0 ^ 0x80; + const int8_t qs1 = (int8_t) * oq1 ^ 0x80; + + // add outer taps if we have high edge variance + int8_t filter = signed_char_clamp(ps1 - qs1) & hev; + + // inner taps + filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; + filter1 = signed_char_clamp(filter + 4) >> 3; + filter2 = signed_char_clamp(filter + 3) >> 3; + + *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; + *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; + filter = filter1; + + // outer tap adjustments filter += 1; filter >>= 1; filter &= ~hev; - u = signed_char_clamp(qs1 - filter); - *oq1 = u ^ 0x80; - u = signed_char_clamp(ps1 + filter); - *op1 = u ^ 0x80; + *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; + *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; } } diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index a76f4c557..fdbabc537 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -114,6 +114,9 @@ typedef struct frame_contexts { vp9_coeff_count coef_counts_8x8[BLOCK_TYPES]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES]; vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; + unsigned int eob_branch_counts[TX_SIZE_MAX_SB][BLOCK_TYPES][REF_TYPES] + [COEF_BANDS][PREV_COEF_CONTEXTS]; + #if CONFIG_CODE_NONZEROCOUNT unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] [NZC4X4_TOKENS]; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index bd4c83f44..110af5e32 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -39,6 +39,127 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, // applied in one direction only, and not at all for 0,0, seems to give the // best quality, but it may be worth trying an additional mode that does // do the filtering on full-pel. +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT + if (scale->x_step_q4 == 16) { + if (scale->y_step_q4 == 16) { + // No scaling in either direction. + scale->predict[0][0][0] = vp9_convolve_copy; + scale->predict[0][0][1] = vp9_convolve_1by8; + scale->predict[0][0][2] = vp9_convolve_qtr; + scale->predict[0][0][3] = vp9_convolve_3by8; + scale->predict[0][0][4] = vp9_convolve_avg; + scale->predict[0][0][5] = vp9_convolve_5by8; + scale->predict[0][0][6] = vp9_convolve_3qtr; + scale->predict[0][0][7] = vp9_convolve_7by8; + scale->predict[0][1][0] = vp9_convolve8_vert; + scale->predict[0][1][1] = vp9_convolve8_1by8_vert; + scale->predict[0][1][2] = vp9_convolve8_qtr_vert; + scale->predict[0][1][3] = vp9_convolve8_3by8_vert; + scale->predict[0][1][4] = vp9_convolve8_avg_vert; + scale->predict[0][1][5] = vp9_convolve8_5by8_vert; + scale->predict[0][1][6] = vp9_convolve8_3qtr_vert; + scale->predict[0][1][7] = vp9_convolve8_7by8_vert; + scale->predict[1][0][0] = vp9_convolve8_horiz; + scale->predict[1][0][1] = vp9_convolve8_1by8_horiz; + scale->predict[1][0][2] = vp9_convolve8_qtr_horiz; + scale->predict[1][0][3] = vp9_convolve8_3by8_horiz; + scale->predict[1][0][4] = vp9_convolve8_avg_horiz; + scale->predict[1][0][5] = vp9_convolve8_5by8_horiz; + scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz; + scale->predict[1][0][7] = vp9_convolve8_7by8_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + scale->predict[0][0][0] = vp9_convolve8_vert; + scale->predict[0][0][1] = vp9_convolve8_1by8_vert; + scale->predict[0][0][2] = vp9_convolve8_qtr_vert; + scale->predict[0][0][3] = vp9_convolve8_3by8_vert; + scale->predict[0][0][4] = vp9_convolve8_avg_vert; + scale->predict[0][0][5] = vp9_convolve8_5by8_vert; + scale->predict[0][0][6] = vp9_convolve8_3qtr_vert; + scale->predict[0][0][7] = vp9_convolve8_7by8_vert; + scale->predict[0][1][0] = vp9_convolve8_vert; + scale->predict[0][1][1] = vp9_convolve8_1by8_vert; + scale->predict[0][1][2] = vp9_convolve8_qtr_vert; + scale->predict[0][1][3] = vp9_convolve8_3by8_vert; + scale->predict[0][1][4] = vp9_convolve8_avg_vert; + scale->predict[0][1][5] = vp9_convolve8_5by8_vert; + scale->predict[0][1][6] = vp9_convolve8_3qtr_vert; + scale->predict[0][1][7] = vp9_convolve8_7by8_vert; + scale->predict[1][0][0] = vp9_convolve8; + scale->predict[1][0][1] = vp9_convolve8_1by8; + scale->predict[1][0][2] = vp9_convolve8_qtr; + scale->predict[1][0][3] = vp9_convolve8_3by8; + scale->predict[1][0][4] = vp9_convolve8_avg; + scale->predict[1][0][5] = vp9_convolve8_5by8; + scale->predict[1][0][6] = vp9_convolve8_3qtr; + scale->predict[1][0][7] = vp9_convolve8_7by8; + } + } else { + if (scale->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + scale->predict[0][0][0] = vp9_convolve8_horiz; + scale->predict[0][0][1] = vp9_convolve8_1by8_horiz; + scale->predict[0][0][2] = vp9_convolve8_qtr_horiz; + scale->predict[0][0][3] = vp9_convolve8_3by8_horiz; + scale->predict[0][0][4] = vp9_convolve8_avg_horiz; + scale->predict[0][0][5] = vp9_convolve8_5by8_horiz; + scale->predict[0][0][6] = vp9_convolve8_3qtr_horiz; + scale->predict[0][0][7] = vp9_convolve8_7by8_horiz; + scale->predict[0][1][0] = vp9_convolve8; + scale->predict[0][1][1] = vp9_convolve8_1by8; + scale->predict[0][1][2] = vp9_convolve8_qtr; + scale->predict[0][1][3] = vp9_convolve8_3by8; + scale->predict[0][1][4] = vp9_convolve8_avg; + scale->predict[0][1][5] = vp9_convolve8_5by8; + scale->predict[0][1][6] = vp9_convolve8_3qtr; + scale->predict[0][1][7] = vp9_convolve8_7by8; + scale->predict[1][0][0] = vp9_convolve8_horiz; + scale->predict[1][0][1] = vp9_convolve8_1by8_horiz; + scale->predict[1][0][2] = vp9_convolve8_qtr_horiz; + scale->predict[1][0][3] = vp9_convolve8_3by8_horiz; + scale->predict[1][0][4] = vp9_convolve8_avg_horiz; + scale->predict[1][0][5] = vp9_convolve8_5by8_horiz; + scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz; + scale->predict[1][0][7] = vp9_convolve8_7by8_horiz; + } else { + // Must always scale in both directions. + scale->predict[0][0][0] = vp9_convolve8; + scale->predict[0][0][1] = vp9_convolve8_1by8; + scale->predict[0][0][2] = vp9_convolve8_qtr; + scale->predict[0][0][3] = vp9_convolve8_3by8; + scale->predict[0][0][4] = vp9_convolve8_avg; + scale->predict[0][0][5] = vp9_convolve8_5by8; + scale->predict[0][0][6] = vp9_convolve8_3qtr; + scale->predict[0][0][7] = vp9_convolve8_7by8; + scale->predict[0][1][0] = vp9_convolve8; + scale->predict[0][1][1] = vp9_convolve8_1by8; + scale->predict[0][1][2] = vp9_convolve8_qtr; + scale->predict[0][1][3] = vp9_convolve8_3by8; + scale->predict[0][1][4] = vp9_convolve8_avg; + scale->predict[0][1][5] = vp9_convolve8_5by8; + scale->predict[0][1][6] = vp9_convolve8_3qtr; + scale->predict[0][1][7] = vp9_convolve8_7by8; + scale->predict[1][0][0] = vp9_convolve8; + scale->predict[1][0][1] = vp9_convolve8_1by8; + scale->predict[1][0][2] = vp9_convolve8_qtr; + scale->predict[1][0][3] = vp9_convolve8_3by8; + scale->predict[1][0][4] = vp9_convolve8_avg; + scale->predict[1][0][5] = vp9_convolve8_5by8; + scale->predict[1][0][6] = vp9_convolve8_3qtr; + scale->predict[1][0][7] = vp9_convolve8_7by8; + } + } + // 2D subpel motion always gets filtered in both directions + scale->predict[1][1][0] = vp9_convolve8; + scale->predict[1][1][1] = vp9_convolve8_1by8; + scale->predict[1][1][2] = vp9_convolve8_qtr; + scale->predict[1][1][3] = vp9_convolve8_3by8; + scale->predict[1][1][4] = vp9_convolve8_avg; + scale->predict[1][1][5] = vp9_convolve8_5by8; + scale->predict[1][1][6] = vp9_convolve8_3qtr; + scale->predict[1][1][7] = vp9_convolve8_7by8; +} +#else if (scale->x_step_q4 == 16) { if (scale->y_step_q4 == 16) { // No scaling in either direction. @@ -80,6 +201,7 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, scale->predict[1][1][0] = vp9_convolve8; scale->predict[1][1][1] = vp9_convolve8_avg; } +#endif void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERPOLATIONFILTERTYPE mcomp_filter_type, @@ -269,12 +391,11 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int_mv *mv_q3, const struct scale_factors *scale, - int w, int h, int do_avg, + int w, int h, int weight, const struct subpix_fn_table *subpix) { int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale); src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); - - scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg]( + scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight]( src, src_stride, dst, dst_stride, subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4, subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4, @@ -289,7 +410,7 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, const int_mv *fullpel_mv_q3, const int_mv *frac_mv_q4, const struct scale_factors *scale, - int w, int h, int do_avg, + int w, int h, int weight, const struct subpix_fn_table *subpix) { const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4) + (frac_mv_q4->as_mv.row & 0xf); @@ -305,18 +426,21 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, const int subpel_y = scaled_mv_row_q4 & 15; src += (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4); - scale->predict[!!subpel_x][!!subpel_y][do_avg]( + scale->predict[!!subpel_x][!!subpel_y][weight]( src, src_stride, dst, dst_stride, subpix->filter_x[subpel_x], scale->x_step_q4, subpix->filter_y[subpel_y], scale->y_step_q4, w, h); } -static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, - struct scale_factors *scale, - int block_size, int stride, int which_mv, - const struct subpix_fn_table *subpix, - int row, int col) { +static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, + struct scale_factors *scale, + uint8_t *predictor, + int block_size, int stride, + int which_mv, int weight, + int width, int height, + const struct subpix_fn_table *subpix, + int row, int col) { assert(d1->predictor - d0->predictor == block_size); assert(d1->pre == d0->pre + block_size); @@ -327,11 +451,11 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, vp9_build_inter_predictor(*base_pre + d0->pre, d0->pre_stride, - d0->predictor, stride, + predictor, stride, &d0->bmi.as_mv[which_mv], &scale[which_mv], - 2 * block_size, block_size, which_mv, - subpix); + width, height, + weight, subpix); } else { uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; @@ -339,116 +463,69 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, vp9_build_inter_predictor(*base_pre0 + d0->pre, d0->pre_stride, - d0->predictor, stride, + predictor, stride, &d0->bmi.as_mv[which_mv], &scale[which_mv], - block_size, block_size, which_mv, - subpix); + width > block_size ? block_size : width, height, + weight, subpix); + + if (width <= block_size) return; set_scaled_offsets(&scale[which_mv], row, col + block_size); vp9_build_inter_predictor(*base_pre1 + d1->pre, d1->pre_stride, - d1->predictor, stride, + predictor + block_size, stride, &d1->bmi.as_mv[which_mv], &scale[which_mv], - block_size, block_size, which_mv, - subpix); + width - block_size, height, + weight, subpix); } } -/*encoder only*/ -void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, - int mb_row, - int mb_col) { - int i, j; - BLOCKD *blockd = xd->block; - - /* build uv mvs */ - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - int yoffset = i * 8 + j * 2; - int uoffset = 16 + i * 2 + j; - int voffset = 20 + i * 2 + j; - int temp; - - temp = blockd[yoffset ].bmi.as_mv[0].as_mv.row - + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row - + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row - + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row; - - if (temp < 0) temp -= 4; - else temp += 4; - - xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) & - xd->fullpixel_mask; - - temp = blockd[yoffset ].bmi.as_mv[0].as_mv.col - + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col - + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col - + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col; - - if (temp < 0) temp -= 4; - else temp += 4; - - blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) & - xd->fullpixel_mask; - - blockd[voffset].bmi.as_mv[0].as_mv.row = - blockd[uoffset].bmi.as_mv[0].as_mv.row; - blockd[voffset].bmi.as_mv[0].as_mv.col = - blockd[uoffset].bmi.as_mv[0].as_mv.col; - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - temp = blockd[yoffset ].bmi.as_mv[1].as_mv.row - + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row - + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row - + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row; - - if (temp < 0) { - temp -= 4; - } else { - temp += 4; - } - - blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) & - xd->fullpixel_mask; +static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, + struct scale_factors *scale, + int block_size, int stride, + int which_mv, int weight, + const struct subpix_fn_table *subpix, + int row, int col) { + assert(d1->predictor - d0->predictor == block_size); + assert(d1->pre == d0->pre + block_size); - temp = blockd[yoffset ].bmi.as_mv[1].as_mv.col - + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col - + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col - + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col; + set_scaled_offsets(&scale[which_mv], row, col); - if (temp < 0) { - temp -= 4; - } else { - temp += 4; - } + if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { + uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; - blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) & - xd->fullpixel_mask; + vp9_build_inter_predictor(*base_pre + d0->pre, + d0->pre_stride, + d0->predictor, stride, + &d0->bmi.as_mv[which_mv], + &scale[which_mv], + 2 * block_size, block_size, + weight, subpix); - blockd[voffset].bmi.as_mv[1].as_mv.row = - blockd[uoffset].bmi.as_mv[1].as_mv.row; - blockd[voffset].bmi.as_mv[1].as_mv.col = - blockd[uoffset].bmi.as_mv[1].as_mv.col; - } - } - } + } else { + uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; + uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre; - for (i = 16; i < 24; i += 2) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - const int x = 4 * (i & 1); - const int y = ((i - 16) >> 1) * 4; + vp9_build_inter_predictor(*base_pre0 + d0->pre, + d0->pre_stride, + d0->predictor, stride, + &d0->bmi.as_mv[which_mv], + &scale[which_mv], + block_size, block_size, + weight, subpix); - int which_mv; - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; + set_scaled_offsets(&scale[which_mv], row, col + block_size); - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, - &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); - } + vp9_build_inter_predictor(*base_pre1 + d1->pre, + d1->pre_stride, + d1->predictor, stride, + &d1->bmi.as_mv[which_mv], + &scale[which_mv], + block_size, block_size, + weight, subpix); } } @@ -488,7 +565,358 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; } -/*encoder only*/ +#define AVERAGE_WEIGHT (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT)) + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT + +// Whether to use implicit weighting for UV +#define USE_IMPLICIT_WEIGHT_UV + +// Whether to use implicit weighting for SplitMV +// #define USE_IMPLICIT_WEIGHT_SPLITMV + +// #define SEARCH_MIN3 +static int64_t get_consistency_metric(MACROBLOCKD *xd, + uint8_t *tmp_y, int tmp_ystride) { + int block_size = 16 << xd->mode_info_context->mbmi.sb_type; + uint8_t *rec_y = xd->dst.y_buffer; + int rec_ystride = xd->dst.y_stride; + int64_t metric = 0; + int i; + if (xd->up_available) { + for (i = 0; i < block_size; ++i) { + int diff = abs(*(rec_y - rec_ystride + i) - + *(tmp_y + i)); +#ifdef SEARCH_MIN3 + // Searches for the min abs diff among 3 pixel neighbors in the border + int diff1 = xd->left_available ? + abs(*(rec_y - rec_ystride + i - 1) - *(tmp_y + i)) : diff; + int diff2 = i < block_size - 1 ? + abs(*(rec_y - rec_ystride + i + 1) - *(tmp_y + i)) : diff; + diff = diff <= diff1 ? diff : diff1; + diff = diff <= diff2 ? diff : diff2; +#endif + metric += diff; + } + } + if (xd->left_available) { + for (i = 0; i < block_size; ++i) { + int diff = abs(*(rec_y - 1 + i * rec_ystride) - + *(tmp_y + i * tmp_ystride)); +#ifdef SEARCH_MIN3 + // Searches for the min abs diff among 3 pixel neighbors in the border + int diff1 = xd->up_available ? + abs(*(rec_y - 1 + (i - 1) * rec_ystride) - + *(tmp_y + i * tmp_ystride)) : diff; + int diff2 = i < block_size - 1 ? + abs(*(rec_y - 1 + (i + 1) * rec_ystride) - + *(tmp_y + i * tmp_ystride)) : diff; + diff = diff <= diff1 ? diff : diff1; + diff = diff <= diff2 ? diff : diff2; +#endif + metric += diff; + } + } + return metric; +} + +static int get_weight(MACROBLOCKD *xd, int64_t metric_1, int64_t metric_2) { + int weight = AVERAGE_WEIGHT; + if (2 * metric_1 < metric_2) + weight = 6; + else if (4 * metric_1 < 3 * metric_2) + weight = 5; + else if (2 * metric_2 < metric_1) + weight = 2; + else if (4 * metric_2 < 3 * metric_1) + weight = 3; + return weight; +} + +#ifdef USE_IMPLICIT_WEIGHT_SPLITMV +static int get_implicit_compoundinter_weight_splitmv( + MACROBLOCKD *xd, int mb_row, int mb_col) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + BLOCKD *blockd = xd->block; + const int use_second_ref = mbmi->second_ref_frame > 0; + int64_t metric_2 = 0, metric_1 = 0; + int i, which_mv, weight; + uint8_t tmp_y[256]; + const int tmp_ystride = 16; + + if (!use_second_ref) return 0; + if (!(xd->up_available || xd->left_available)) + return AVERAGE_WEIGHT; + + assert(xd->mode_info_context->mbmi.mode == SPLITMV); + + which_mv = 1; // second predictor + if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { + for (i = 0; i < 16; i += 8) { + BLOCKD *d0 = &blockd[i]; + BLOCKD *d1 = &blockd[i + 2]; + const int y = i & 8; + + blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; + blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; + + if (mbmi->need_to_clamp_mvs) { + clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); + } + if (i == 0) { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, + which_mv, 0, 16, 1, + &xd->subpix, mb_row * 16 + y, mb_col * 16); + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, + which_mv, 0, 1, 8, + &xd->subpix, mb_row * 16 + y, mb_col * 16); + } else { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16, + 8, 16, which_mv, 0, 1, 8, + &xd->subpix, mb_row * 16 + y, mb_col * 16); + } + } + } else { + for (i = 0; i < 16; i += 2) { + BLOCKD *d0 = &blockd[i]; + BLOCKD *d1 = &blockd[i + 1]; + const int x = (i & 3) * 4; + const int y = (i >> 2) * 4; + + blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; + blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; + + if (i >= 4 && (i & 3) != 0) continue; + + if (i == 0) { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, + which_mv, 0, 8, 1, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, + which_mv, 0, 1, 4, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + } else if (i < 4) { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16, + which_mv, 0, 8, 1, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + } else { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16, + 4, 16, which_mv, 0, 1, 4, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + } + } + } + metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride); + + which_mv = 0; // first predictor + if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { + for (i = 0; i < 16; i += 8) { + BLOCKD *d0 = &blockd[i]; + BLOCKD *d1 = &blockd[i + 2]; + const int y = i & 8; + + blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; + blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; + + if (mbmi->need_to_clamp_mvs) { + clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); + } + if (i == 0) { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, + which_mv, 0, 16, 1, + &xd->subpix, mb_row * 16 + y, mb_col * 16); + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, + which_mv, 0, 1, 8, + &xd->subpix, mb_row * 16 + y, mb_col * 16); + } else { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16, + 8, 16, which_mv, 0, 1, 8, + &xd->subpix, mb_row * 16 + y, mb_col * 16); + } + } + } else { + for (i = 0; i < 16; i += 2) { + BLOCKD *d0 = &blockd[i]; + BLOCKD *d1 = &blockd[i + 1]; + const int x = (i & 3) * 4; + const int y = (i >> 2) * 4; + + blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; + blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; + + if (i >= 4 && (i & 3) != 0) continue; + + if (i == 0) { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, + which_mv, 0, 8, 1, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, + which_mv, 0, 1, 4, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + } else if (i < 4) { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16, + which_mv, 0, 8, 1, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + } else { + build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16, + 4, 16, which_mv, 0, 1, 4, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); + } + } + } + metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride); + + // Choose final weight for averaging + weight = get_weight(xd, metric_1, metric_2); + return weight; +} +#endif + +static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, + int mb_row, + int mb_col) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + int64_t metric_2 = 0, metric_1 = 0; + int n, clamp_mvs, pre_stride; + uint8_t *base_pre; + int_mv ymv; + uint8_t tmp_y[4096]; + const int tmp_ystride = 64; + int weight; + int edge[4]; + int block_size = 16 << xd->mode_info_context->mbmi.sb_type; + + if (!use_second_ref) return 0; + if (!(xd->up_available || xd->left_available)) + return AVERAGE_WEIGHT; + + edge[0] = xd->mb_to_top_edge; + edge[1] = xd->mb_to_bottom_edge; + edge[2] = xd->mb_to_left_edge; + edge[3] = xd->mb_to_right_edge; + + clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_secondmv; + base_pre = xd->second_pre.y_buffer; + pre_stride = xd->second_pre.y_stride; + ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int; + // First generate the second predictor + for (n = 0; n < block_size; n += 16) { + xd->mb_to_left_edge = edge[2] - (n << 3); + xd->mb_to_right_edge = edge[3] + ((16 - n) << 3); + if (clamp_mvs) + clamp_mv_to_umv_border(&ymv.as_mv, xd); + set_scaled_offsets(&xd->scale_factor[1], mb_row * 16, mb_col * 16 + n); + // predict a single row of pixels + vp9_build_inter_predictor( + base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[1]), + pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[1], + 16, 1, 0, &xd->subpix); + } + xd->mb_to_left_edge = edge[2]; + xd->mb_to_right_edge = edge[3]; + for (n = 0; n < block_size; n += 16) { + xd->mb_to_top_edge = edge[0] - (n << 3); + xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3); + if (clamp_mvs) + clamp_mv_to_umv_border(&ymv.as_mv, xd); + set_scaled_offsets(&xd->scale_factor[1], mb_row * 16 + n, mb_col * 16); + // predict a single col of pixels + vp9_build_inter_predictor( + base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[1]), + pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv, + &xd->scale_factor[1], 1, 16, 0, &xd->subpix); + } + xd->mb_to_top_edge = edge[0]; + xd->mb_to_bottom_edge = edge[1]; + // Compute consistency metric + metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride); + + clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_mvs; + base_pre = xd->pre.y_buffer; + pre_stride = xd->pre.y_stride; + ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int; + // Now generate the first predictor + for (n = 0; n < block_size; n += 16) { + xd->mb_to_left_edge = edge[2] - (n << 3); + xd->mb_to_right_edge = edge[3] + ((16 - n) << 3); + if (clamp_mvs) + clamp_mv_to_umv_border(&ymv.as_mv, xd); + set_scaled_offsets(&xd->scale_factor[0], mb_row * 16, mb_col * 16 + n); + // predict a single row of pixels + vp9_build_inter_predictor( + base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[0]), + pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[0], + 16, 1, 0, &xd->subpix); + } + xd->mb_to_left_edge = edge[2]; + xd->mb_to_right_edge = edge[3]; + for (n = 0; n < block_size; n += 16) { + xd->mb_to_top_edge = edge[0] - (n << 3); + xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3); + if (clamp_mvs) + clamp_mv_to_umv_border(&ymv.as_mv, xd); + set_scaled_offsets(&xd->scale_factor[0], mb_row * 16 + n, mb_col * 16); + // predict a single col of pixels + vp9_build_inter_predictor( + base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[0]), + pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv, + &xd->scale_factor[0], 1, 16, 0, &xd->subpix); + } + xd->mb_to_top_edge = edge[0]; + xd->mb_to_bottom_edge = edge[1]; + metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride); + + // Choose final weight for averaging + weight = get_weight(xd, metric_1, metric_2); + return weight; +} + +static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd, + uint8_t *dst_y, + int dst_ystride, + int weight, + int mb_row, + int mb_col) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + int which_mv; + + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + const int clamp_mvs = which_mv ? + xd->mode_info_context->mbmi.need_to_clamp_secondmv : + xd->mode_info_context->mbmi.need_to_clamp_mvs; + + uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; + int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; + int_mv ymv; + ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; + + if (clamp_mvs) + clamp_mv_to_umv_border(&ymv.as_mv, xd); + + set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); + + vp9_build_inter_predictor(base_pre, pre_stride, + dst_y, dst_ystride, + &ymv, &xd->scale_factor[which_mv], + 16, 16, which_mv ? weight : 0, &xd->subpix); + } +} + +void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col) { + int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); + + build_inter16x16_predictors_mby_w(xd, dst_y, dst_ystride, weight, + mb_row, mb_col); +} + +#else + void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, uint8_t *dst_y, int dst_ystride, @@ -518,6 +946,86 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, 16, 16, which_mv, &xd->subpix); } } +#endif + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int weight, + int mb_row, + int mb_col) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + int which_mv; + + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + const int clamp_mvs = + which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv + : xd->mode_info_context->mbmi.need_to_clamp_mvs; + uint8_t *uptr, *vptr; + int pre_stride = which_mv ? xd->second_pre.uv_stride + : xd->pre.uv_stride; + int_mv _o16x16mv; + int_mv _16x16mv; + + _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; + + if (clamp_mvs) + clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); + + _o16x16mv = _16x16mv; + /* calc uv motion vectors */ + if (_16x16mv.as_mv.row < 0) + _16x16mv.as_mv.row -= 1; + else + _16x16mv.as_mv.row += 1; + + if (_16x16mv.as_mv.col < 0) + _16x16mv.as_mv.col -= 1; + else + _16x16mv.as_mv.col += 1; + + _16x16mv.as_mv.row /= 2; + _16x16mv.as_mv.col /= 2; + + _16x16mv.as_mv.row &= xd->fullpixel_mask; + _16x16mv.as_mv.col &= xd->fullpixel_mask; + + uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); + vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); + + set_scaled_offsets(&xd->scale_factor_uv[which_mv], + mb_row * 16, mb_col * 16); + + vp9_build_inter_predictor_q4( + uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, + &xd->scale_factor_uv[which_mv], 8, 8, + which_mv ? weight : 0, &xd->subpix); + + vp9_build_inter_predictor_q4( + vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv, + &xd->scale_factor_uv[which_mv], 8, 8, + which_mv ? weight : 0, &xd->subpix); + } +} + +void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col) { +#ifdef USE_IMPLICIT_WEIGHT_UV + int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); +#else + int weight = AVERAGE_WEIGHT; +#endif + build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride, + weight, mb_row, mb_col); +} + +#else void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uint8_t *dst_u, @@ -567,31 +1075,28 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, set_scaled_offsets(&xd->scale_factor_uv[which_mv], mb_row * 16, mb_col * 16); - vp9_build_inter_predictor_q4(uptr, pre_stride, - dst_u, dst_uvstride, - &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], - 8, 8, which_mv, &xd->subpix); + vp9_build_inter_predictor_q4( + uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, + &xd->scale_factor_uv[which_mv], 8, 8, + which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); - vp9_build_inter_predictor_q4(vptr, pre_stride, - dst_v, dst_uvstride, - &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], - 8, 8, which_mv, &xd->subpix); + vp9_build_inter_predictor_q4( + vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv, + &xd->scale_factor_uv[which_mv], 8, 8, + which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); } } +#endif -void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col) { - uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; - uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, - *v2 = x->second_pre.v_buffer; +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void build_inter32x32_predictors_sby_w(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int weight, + int mb_row, + int mb_col) { + uint8_t *y1 = x->pre.y_buffer; + uint8_t *y2 = x->second_pre.y_buffer; int edge[4], n; edge[0] = x->mb_to_top_edge; @@ -601,7 +1106,6 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - int scaled_uv_offset; x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); @@ -612,6 +1116,118 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, y_idx * 16, x->pre.y_stride, &x->scale_factor[0]); + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->second_pre.y_stride, + &x->scale_factor[1]); + } + build_inter16x16_predictors_mby_w(x, + dst_y + y_idx * 16 * dst_ystride + x_idx * 16, + dst_ystride, weight, mb_row + y_idx, mb_col + x_idx); + } + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.y_buffer = y1; + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2; + } +} + +void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col) { + int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col); + build_inter32x32_predictors_sby_w(x, dst_y, dst_ystride, weight, + mb_row, mb_col); +} + +#else + +// TODO(all): Can we use 32x32 specific implementations of this rather than +// using 16x16 implementations ? +void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col) { + uint8_t *y1 = x->pre.y_buffer; + uint8_t *y2 = x->second_pre.y_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); + + x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->pre.y_stride, + &x->scale_factor[0]); + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->second_pre.y_stride, + &x->scale_factor[1]); + } + vp9_build_inter16x16_predictors_mby(x, + dst_y + y_idx * 16 * dst_ystride + x_idx * 16, + dst_ystride, mb_row + y_idx, mb_col + x_idx); + } + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.y_buffer = y1; + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2; + } +} + +#endif + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void build_inter32x32_predictors_sbuv_w(MACROBLOCKD *x, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int weight, + int mb_row, + int mb_col) { + uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; + uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + int scaled_uv_offset; + const int x_idx = n & 1, y_idx = n >> 1; + + x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, y_idx * 8, x->pre.uv_stride, @@ -620,11 +1236,6 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, x->pre.v_buffer = v1 + scaled_uv_offset; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->second_pre.y_stride, - &x->scale_factor[1]); scaled_uv_offset = scaled_buffer_offset(x_idx * 8, y_idx * 8, x->second_pre.uv_stride, @@ -633,37 +1244,103 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, x->second_pre.v_buffer = v2 + scaled_uv_offset; } - vp9_build_inter16x16_predictors_mb(x, - dst_y + y_idx * 16 * dst_ystride + x_idx * 16, + build_inter16x16_predictors_mbuv_w(x, dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_ystride, dst_uvstride, mb_row + y_idx, mb_col + x_idx); + dst_uvstride, weight, mb_row + y_idx, mb_col + x_idx); } - x->mb_to_top_edge = edge[0]; x->mb_to_bottom_edge = edge[1]; x->mb_to_left_edge = edge[2]; x->mb_to_right_edge = edge[3]; - x->pre.y_buffer = y1; x->pre.u_buffer = u1; x->pre.v_buffer = v1; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; x->second_pre.u_buffer = u2; x->second_pre.v_buffer = v2; } +} -#if CONFIG_COMP_INTERINTRA_PRED - if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_32x32_predictors_sb( - x, dst_y, dst_u, dst_v, dst_ystride, dst_uvstride); - } +void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *xd, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col) { +#ifdef USE_IMPLICIT_WEIGHT_UV + int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); +#else + int weight = AVERAGE_WEIGHT; #endif + build_inter32x32_predictors_sbuv_w(xd, dst_u, dst_v, dst_uvstride, + weight, mb_row, mb_col); } -void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, +#else + +void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *x, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col) { + uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; + uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + int scaled_uv_offset; + const int x_idx = n & 1, y_idx = n >> 1; + + x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); + + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, + y_idx * 8, + x->pre.uv_stride, + &x->scale_factor_uv[0]); + x->pre.u_buffer = u1 + scaled_uv_offset; + x->pre.v_buffer = v1 + scaled_uv_offset; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, + y_idx * 8, + x->second_pre.uv_stride, + &x->scale_factor_uv[1]); + x->second_pre.u_buffer = u2 + scaled_uv_offset; + x->second_pre.v_buffer = v2 + scaled_uv_offset; + } + + vp9_build_inter16x16_predictors_mbuv(x, + dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, + dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, + dst_uvstride, mb_row + y_idx, mb_col + x_idx); + } + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.u_buffer = u1; + x->pre.v_buffer = v1; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.u_buffer = u2; + x->second_pre.v_buffer = v2; + } +} +#endif + +void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, uint8_t *dst_v, @@ -671,9 +1348,27 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, int dst_uvstride, int mb_row, int mb_col) { - uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; - uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, - *v2 = x->second_pre.v_buffer; + vp9_build_inter32x32_predictors_sby(x, dst_y, dst_ystride, + mb_row, mb_col); + vp9_build_inter32x32_predictors_sbuv(x, dst_u, dst_v, dst_uvstride, + mb_row, mb_col); +#if CONFIG_COMP_INTERINTRA_PRED + if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_32x32_predictors_sb( + x, dst_y, dst_u, dst_v, dst_ystride, dst_uvstride); + } +#endif +} + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void build_inter64x64_predictors_sby_w(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int weight, + int mb_row, + int mb_col) { + uint8_t *y1 = x->pre.y_buffer; + uint8_t *y2 = x->second_pre.y_buffer; int edge[4], n; edge[0] = x->mb_to_top_edge; @@ -683,7 +1378,6 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - int scaled_uv_offset; x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); @@ -694,6 +1388,121 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, y_idx * 32, x->pre.y_stride, &x->scale_factor[0]); + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->second_pre.y_stride, + &x->scale_factor[1]); + } + + build_inter32x32_predictors_sby_w(x, + dst_y + y_idx * 32 * dst_ystride + x_idx * 32, + dst_ystride, weight, mb_row + y_idx * 2, mb_col + x_idx * 2); + } + + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.y_buffer = y1; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2; + } +} + +void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col) { + int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col); + build_inter64x64_predictors_sby_w(x, dst_y, dst_ystride, weight, + mb_row, mb_col); +} + +#else + +void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col) { + uint8_t *y1 = x->pre.y_buffer; + uint8_t *y2 = x->second_pre.y_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); + + x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->pre.y_stride, + &x->scale_factor[0]); + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->second_pre.y_stride, + &x->scale_factor[1]); + } + + vp9_build_inter32x32_predictors_sby(x, + dst_y + y_idx * 32 * dst_ystride + x_idx * 32, + dst_ystride, mb_row + y_idx * 2, mb_col + x_idx * 2); + } + + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.y_buffer = y1; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2; + } +} +#endif + +void vp9_build_inter64x64_predictors_sbuv(MACROBLOCKD *x, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col) { + uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; + uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + int scaled_uv_offset; + + x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); + scaled_uv_offset = scaled_buffer_offset(x_idx * 16, y_idx * 16, x->pre.uv_stride, @@ -702,11 +1511,6 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, x->pre.v_buffer = v1 + scaled_uv_offset; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 32, - y_idx * 32, - x->second_pre.y_stride, - &x->scale_factor[1]); scaled_uv_offset = scaled_buffer_offset(x_idx * 16, y_idx * 16, x->second_pre.uv_stride, @@ -715,11 +1519,10 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, x->second_pre.v_buffer = v2 + scaled_uv_offset; } - vp9_build_inter32x32_predictors_sb(x, - dst_y + y_idx * 32 * dst_ystride + x_idx * 32, + vp9_build_inter32x32_predictors_sbuv(x, dst_u + y_idx * 16 * dst_uvstride + x_idx * 16, dst_v + y_idx * 16 * dst_uvstride + x_idx * 16, - dst_ystride, dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2); + dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2); } x->mb_to_top_edge = edge[0]; @@ -727,16 +1530,27 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, x->mb_to_left_edge = edge[2]; x->mb_to_right_edge = edge[3]; - x->pre.y_buffer = y1; x->pre.u_buffer = u1; x->pre.v_buffer = v1; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; x->second_pre.u_buffer = u2; x->second_pre.v_buffer = v2; } +} +void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride, + int mb_row, + int mb_col) { + vp9_build_inter64x64_predictors_sby(x, dst_y, dst_ystride, + mb_row, mb_col); + vp9_build_inter64x64_predictors_sbuv(x, dst_u, dst_v, dst_uvstride, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v, @@ -752,6 +1566,11 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, BLOCKD *blockd = xd->block; int which_mv = 0; const int use_second_ref = mbmi->second_ref_frame > 0; +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && defined(USE_IMPLICIT_WEIGHT_SPLITMV) + int weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col); +#else + int weight = AVERAGE_WEIGHT; +#endif if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { for (i = 0; i < 16; i += 8) { @@ -768,9 +1587,9 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); } - build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, - which_mv, &xd->subpix, - mb_row * 16 + y, mb_col * 16); + build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv, + which_mv ? weight : 0, + &xd->subpix, mb_row * 16 + y, mb_col * 16); } } } else { @@ -784,13 +1603,18 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, - which_mv, &xd->subpix, + build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv, + which_mv ? weight : 0, + &xd->subpix, mb_row * 16 + y, mb_col * 16 + x); } } } - +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +#if !defined(USE_IMPLICIT_WEIGHT_UV) + weight = AVERAGE_WEIGHT; +#endif +#endif for (i = 16; i < 24; i += 2) { BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; @@ -798,8 +1622,8 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, const int y = ((i - 16) >> 1) * 4; for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, - which_mv, &xd->subpix, + build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, + which_mv ? weight : 0, &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); } } @@ -876,9 +1700,14 @@ void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col); vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride, mb_row, mb_col); +#if CONFIG_COMP_INTERINTRA_PRED + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_16x16_predictors_mb(xd, dst_y, dst_u, dst_v, + dst_ystride, dst_uvstride); + } +#endif } - void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, int mb_row, int mb_col) { @@ -888,15 +1717,116 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, &xd->predictor[320], 16, 8, mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, xd->predictor, - &xd->predictor[256], - &xd->predictor[320], 16, 8); - } -#endif } else { build_4x4uvmvs(xd); build_inter4x4_predictors_mb(xd, mb_row, mb_col); } } + +/*encoder only*/ +void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, + int mb_row, + int mb_col) { + int i, j; + int weight; + BLOCKD *blockd = xd->block; + + /* build uv mvs */ + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + int yoffset = i * 8 + j * 2; + int uoffset = 16 + i * 2 + j; + int voffset = 20 + i * 2 + j; + int temp; + + temp = blockd[yoffset ].bmi.as_mv[0].as_mv.row + + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row + + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row + + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row; + + if (temp < 0) + temp -= 4; + else + temp += 4; + + xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) & + xd->fullpixel_mask; + + temp = blockd[yoffset ].bmi.as_mv[0].as_mv.col + + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col + + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col + + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col; + + if (temp < 0) + temp -= 4; + else + temp += 4; + + blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) & + xd->fullpixel_mask; + + blockd[voffset].bmi.as_mv[0].as_mv.row = + blockd[uoffset].bmi.as_mv[0].as_mv.row; + blockd[voffset].bmi.as_mv[0].as_mv.col = + blockd[uoffset].bmi.as_mv[0].as_mv.col; + + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + temp = blockd[yoffset ].bmi.as_mv[1].as_mv.row + + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row + + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row + + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row; + + if (temp < 0) { + temp -= 4; + } else { + temp += 4; + } + + blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) & + xd->fullpixel_mask; + + temp = blockd[yoffset ].bmi.as_mv[1].as_mv.col + + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col + + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col + + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col; + + if (temp < 0) { + temp -= 4; + } else { + temp += 4; + } + + blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) & + xd->fullpixel_mask; + + blockd[voffset].bmi.as_mv[1].as_mv.row = + blockd[uoffset].bmi.as_mv[1].as_mv.row; + blockd[voffset].bmi.as_mv[1].as_mv.col = + blockd[uoffset].bmi.as_mv[1].as_mv.col; + } + } + } + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && \ + defined(USE_IMPLICIT_WEIGHT_SPLITMV) && \ + defined(USE_IMPLICIT_WEIGHT_UV) + weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col); +#else + weight = AVERAGE_WEIGHT; +#endif + for (i = 16; i < 24; i += 2) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + const int x = 4 * (i & 1); + const int y = ((i - 16) >> 1) * 4; + + int which_mv; + BLOCKD *d0 = &blockd[i]; + BLOCKD *d1 = &blockd[i + 1]; + + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, + which_mv ? weight : 0, + &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); + } + } +} diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index c1153ed9a..8b6efc384 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -276,6 +276,62 @@ specialize vp9_convolve8_avg_horiz ssse3 prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" specialize vp9_convolve8_avg_vert ssse3 +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +prototype void vp9_convolve8_1by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_1by8 + +prototype void vp9_convolve8_qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_qtr + +prototype void vp9_convolve8_3by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_3by8 + +prototype void vp9_convolve8_5by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_5by8 + +prototype void vp9_convolve8_3qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_3qtr + +prototype void vp9_convolve8_7by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_7by8 + +prototype void vp9_convolve8_1by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_1by8_horiz + +prototype void vp9_convolve8_qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_qtr_horiz + +prototype void vp9_convolve8_3by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_3by8_horiz + +prototype void vp9_convolve8_5by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_5by8_horiz + +prototype void vp9_convolve8_3qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_3qtr_horiz + +prototype void vp9_convolve8_7by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_7by8_horiz + +prototype void vp9_convolve8_1by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_1by8_vert + +prototype void vp9_convolve8_qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_qtr_vert + +prototype void vp9_convolve8_3by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_3by8_vert + +prototype void vp9_convolve8_5by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_5by8_vert + +prototype void vp9_convolve8_3qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_3qtr_vert + +prototype void vp9_convolve8_7by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_7by8_vert +#endif + # # dct # @@ -305,7 +361,7 @@ specialize vp9_short_idct1_16x16 prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct32x32 +specialize vp9_short_idct32x32 sse2 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" specialize vp9_short_idct1_32x32 diff --git a/vp9/common/x86/vp9_idct_x86.c b/vp9/common/x86/vp9_idct_x86.c index 1a2c84a40..c8a3873f7 100644 --- a/vp9/common/x86/vp9_idct_x86.c +++ b/vp9/common/x86/vp9_idct_x86.c @@ -1506,4 +1506,694 @@ void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { output += 8; } } + +// Define Macro for multiplying elements by constants and adding them together. +#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \ + cst0, cst1, cst2, cst3, res0, res1, res2, res3) \ + { \ + tmp0 = _mm_madd_epi16(lo_0, cst0); \ + tmp1 = _mm_madd_epi16(hi_0, cst0); \ + tmp2 = _mm_madd_epi16(lo_0, cst1); \ + tmp3 = _mm_madd_epi16(hi_0, cst1); \ + tmp4 = _mm_madd_epi16(lo_1, cst2); \ + tmp5 = _mm_madd_epi16(hi_1, cst2); \ + tmp6 = _mm_madd_epi16(lo_1, cst3); \ + tmp7 = _mm_madd_epi16(hi_1, cst3); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + tmp4 = _mm_add_epi32(tmp4, rounding); \ + tmp5 = _mm_add_epi32(tmp5, rounding); \ + tmp6 = _mm_add_epi32(tmp6, rounding); \ + tmp7 = _mm_add_epi32(tmp7, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \ + tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \ + tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \ + tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \ + \ + res0 = _mm_packs_epi32(tmp0, tmp1); \ + res1 = _mm_packs_epi32(tmp2, tmp3); \ + res2 = _mm_packs_epi32(tmp4, tmp5); \ + res3 = _mm_packs_epi32(tmp6, tmp7); \ + } + +void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) { + const int half_pitch = pitch >> 1; + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1<<5); + + // idct constants for each stage + const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); + const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); + const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64); + const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64); + const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64); + const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64); + const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); + const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); + const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); + const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); + const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64); + const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64); + const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64); + const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64); + const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); + const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); + + const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); + const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); + const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); + const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); + const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); + + const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); + const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); + const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); + const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); + const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); + const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); + const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); + + const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + + const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); + + __m128i in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, + in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, + in24, in25, in26, in27, in28, in29, in30, in31; + __m128i col[128]; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, + stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, + stp1_30, stp1_31; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, + stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, + stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, + stp2_30, stp2_31; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int i, j; + + // We work on a 8x32 block each time, and loop 8 times for 2-D 32x32 idct. + for (i = 0; i < 8; i++) { + if (i < 4) { + // First 1-D idct + // Load input data. + in0 = _mm_load_si128((__m128i *)input); + in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); + in16 = _mm_load_si128((__m128i *)(input + 8 * 2)); + in24 = _mm_load_si128((__m128i *)(input + 8 * 3)); + in1 = _mm_load_si128((__m128i *)(input + 8 * 4)); + in9 = _mm_load_si128((__m128i *)(input + 8 * 5)); + in17 = _mm_load_si128((__m128i *)(input + 8 * 6)); + in25 = _mm_load_si128((__m128i *)(input + 8 * 7)); + in2 = _mm_load_si128((__m128i *)(input + 8 * 8)); + in10 = _mm_load_si128((__m128i *)(input + 8 * 9)); + in18 = _mm_load_si128((__m128i *)(input + 8 * 10)); + in26 = _mm_load_si128((__m128i *)(input + 8 * 11)); + in3 = _mm_load_si128((__m128i *)(input + 8 * 12)); + in11 = _mm_load_si128((__m128i *)(input + 8 * 13)); + in19 = _mm_load_si128((__m128i *)(input + 8 * 14)); + in27 = _mm_load_si128((__m128i *)(input + 8 * 15)); + + in4 = _mm_load_si128((__m128i *)(input + 8 * 16)); + in12 = _mm_load_si128((__m128i *)(input + 8 * 17)); + in20 = _mm_load_si128((__m128i *)(input + 8 * 18)); + in28 = _mm_load_si128((__m128i *)(input + 8 * 19)); + in5 = _mm_load_si128((__m128i *)(input + 8 * 20)); + in13 = _mm_load_si128((__m128i *)(input + 8 * 21)); + in21 = _mm_load_si128((__m128i *)(input + 8 * 22)); + in29 = _mm_load_si128((__m128i *)(input + 8 * 23)); + in6 = _mm_load_si128((__m128i *)(input + 8 * 24)); + in14 = _mm_load_si128((__m128i *)(input + 8 * 25)); + in22 = _mm_load_si128((__m128i *)(input + 8 * 26)); + in30 = _mm_load_si128((__m128i *)(input + 8 * 27)); + in7 = _mm_load_si128((__m128i *)(input + 8 * 28)); + in15 = _mm_load_si128((__m128i *)(input + 8 * 29)); + in23 = _mm_load_si128((__m128i *)(input + 8 * 30)); + in31 = _mm_load_si128((__m128i *)(input + 8 * 31)); + + input += 256; + + // Transpose 32x8 block to 8x32 block + TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); + TRANSPOSE_8X8(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, + in10, in11, in12, in13, in14, in15); + TRANSPOSE_8X8(in16, in17, in18, in19, in20, in21, in22, in23, in16, in17, + in18, in19, in20, in21, in22, in23); + TRANSPOSE_8X8(in24, in25, in26, in27, in28, in29, in30, in31, in24, in25, + in26, in27, in28, in29, in30, in31); + } else { + // Second 1-D idct + j = i - 4; + + // Transpose 32x8 block to 8x32 block + TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2], + col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5], + col[j * 8 + 6], col[j * 8 + 7], in0, in1, in2, in3, in4, + in5, in6, in7); + j += 4; + TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2], + col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5], + col[j * 8 + 6], col[j * 8 + 7], in8, in9, in10, + in11, in12, in13, in14, in15); + j += 4; + TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2], + col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5], + col[j * 8 + 6], col[j * 8 + 7], in16, in17, in18, + in19, in20, in21, in22, in23); + j += 4; + TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2], + col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5], + col[j * 8 + 6], col[j * 8 + 7], in24, in25, in26, in27, + in28, in29, in30, in31); + } + + // Stage1 + { + const __m128i lo_1_31 = _mm_unpacklo_epi16(in1, in31); + const __m128i hi_1_31 = _mm_unpackhi_epi16(in1, in31); + const __m128i lo_17_15 = _mm_unpacklo_epi16(in17, in15); + const __m128i hi_17_15 = _mm_unpackhi_epi16(in17, in15); + + const __m128i lo_9_23 = _mm_unpacklo_epi16(in9, in23); + const __m128i hi_9_23 = _mm_unpackhi_epi16(in9, in23); + const __m128i lo_25_7= _mm_unpacklo_epi16(in25, in7); + const __m128i hi_25_7 = _mm_unpackhi_epi16(in25, in7); + + const __m128i lo_5_27 = _mm_unpacklo_epi16(in5, in27); + const __m128i hi_5_27 = _mm_unpackhi_epi16(in5, in27); + const __m128i lo_21_11 = _mm_unpacklo_epi16(in21, in11); + const __m128i hi_21_11 = _mm_unpackhi_epi16(in21, in11); + + const __m128i lo_13_19 = _mm_unpacklo_epi16(in13, in19); + const __m128i hi_13_19 = _mm_unpackhi_epi16(in13, in19); + const __m128i lo_29_3 = _mm_unpacklo_epi16(in29, in3); + const __m128i hi_29_3 = _mm_unpackhi_epi16(in29, in3); + + MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, + stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, + stp1_17, stp1_30) + MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, + stg1_5, stg1_6, stg1_7, stp1_18, stp1_29, + stp1_19, stp1_28) + MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, + stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, + stp1_21, stp1_26) + MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, + stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, + stp1_23, stp1_24) + } + + // Stage2 + { + const __m128i lo_2_30 = _mm_unpacklo_epi16(in2, in30); + const __m128i hi_2_30 = _mm_unpackhi_epi16(in2, in30); + const __m128i lo_18_14 = _mm_unpacklo_epi16(in18, in14); + const __m128i hi_18_14 = _mm_unpackhi_epi16(in18, in14); + + const __m128i lo_10_22 = _mm_unpacklo_epi16(in10, in22); + const __m128i hi_10_22 = _mm_unpackhi_epi16(in10, in22); + const __m128i lo_26_6 = _mm_unpacklo_epi16(in26, in6); + const __m128i hi_26_6 = _mm_unpackhi_epi16(in26, in6); + + MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, + stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, + stp2_14) + MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, + stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, + stp2_11, stp2_12) + + stp2_16 = _mm_add_epi16(stp1_16, stp1_17); + stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); + stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); + stp2_19 = _mm_add_epi16(stp1_19, stp1_18); + + stp2_20 = _mm_add_epi16(stp1_20, stp1_21); + stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); + stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); + stp2_23 = _mm_add_epi16(stp1_23, stp1_22); + + stp2_24 = _mm_add_epi16(stp1_24, stp1_25); + stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); + stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); + stp2_27 = _mm_add_epi16(stp1_27, stp1_26); + + stp2_28 = _mm_add_epi16(stp1_28, stp1_29); + stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); + stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); + stp2_31 = _mm_add_epi16(stp1_31, stp1_30); + } + + // Stage3 + { + const __m128i lo_4_28 = _mm_unpacklo_epi16(in4, in28); + const __m128i hi_4_28 = _mm_unpackhi_epi16(in4, in28); + const __m128i lo_20_12 = _mm_unpacklo_epi16(in20, in12); + const __m128i hi_20_12 = _mm_unpackhi_epi16(in20, in12); + + const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); + const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); + + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); + + MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, + stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, + stp1_6) + + stp1_8 = _mm_add_epi16(stp2_8, stp2_9); + stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); + stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); + stp1_11 = _mm_add_epi16(stp2_11, stp2_10); + stp1_12 = _mm_add_epi16(stp2_12, stp2_13); + stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); + stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); + stp1_15 = _mm_add_epi16(stp2_15, stp2_14); + + MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, + stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, + stp1_18, stp1_29) + MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, + stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, + stp1_22, stp1_25) + + stp1_16 = stp2_16; + stp1_31 = stp2_31; + stp1_19 = stp2_19; + stp1_20 = stp2_20; + stp1_23 = stp2_23; + stp1_24 = stp2_24; + stp1_27 = stp2_27; + stp1_28 = stp2_28; + } + + // Stage4 + { + const __m128i lo_0_16 = _mm_unpacklo_epi16(in0, in16); + const __m128i hi_0_16 = _mm_unpackhi_epi16(in0, in16); + const __m128i lo_8_24 = _mm_unpacklo_epi16(in8, in24); + const __m128i hi_8_24 = _mm_unpackhi_epi16(in8, in24); + + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); + + MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, + stg4_1, stg4_2, stg4_3, stp2_0, stp2_1, + stp2_2, stp2_3) + + stp2_4 = _mm_add_epi16(stp1_4, stp1_5); + stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); + stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); + stp2_7 = _mm_add_epi16(stp1_7, stp1_6); + + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, + stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, + stp2_10, stp2_13) + + stp2_8 = stp1_8; + stp2_15 = stp1_15; + stp2_11 = stp1_11; + stp2_12 = stp1_12; + + stp2_16 = _mm_add_epi16(stp1_16, stp1_19); + stp2_17 = _mm_add_epi16(stp1_17, stp1_18); + stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); + stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); + stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); + stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); + stp2_22 = _mm_add_epi16(stp1_22, stp1_21); + stp2_23 = _mm_add_epi16(stp1_23, stp1_20); + + stp2_24 = _mm_add_epi16(stp1_24, stp1_27); + stp2_25 = _mm_add_epi16(stp1_25, stp1_26); + stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); + stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); + stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); + stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); + stp2_30 = _mm_add_epi16(stp1_29, stp1_30); + stp2_31 = _mm_add_epi16(stp1_28, stp1_31); + } + + // Stage5 + { + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); + + const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); + const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); + + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); + + stp1_0 = _mm_add_epi16(stp2_0, stp2_3); + stp1_1 = _mm_add_epi16(stp2_1, stp2_2); + stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); + stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); + + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp1 = _mm_add_epi32(tmp1, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp3 = _mm_add_epi32(tmp3, rounding); + + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); + + stp1_5 = _mm_packs_epi32(tmp0, tmp1); + stp1_6 = _mm_packs_epi32(tmp2, tmp3); + + stp1_4 = stp2_4; + stp1_7 = stp2_7; + + stp1_8 = _mm_add_epi16(stp2_8, stp2_11); + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); + stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); + stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); + stp1_15 = _mm_add_epi16(stp2_15, stp2_12); + + stp1_16 = stp2_16; + stp1_17 = stp2_17; + + MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, + stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, + stp1_19, stp1_28) + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, + stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, + stp1_21, stp1_26) + + stp1_22 = stp2_22; + stp1_23 = stp2_23; + stp1_24 = stp2_24; + stp1_25 = stp2_25; + stp1_30 = stp2_30; + stp1_31 = stp2_31; + } + + // Stage6 + { + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); + + stp2_0 = _mm_add_epi16(stp1_0, stp1_7); + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); + stp2_3 = _mm_add_epi16(stp1_3, stp1_4); + stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); + stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); + + stp2_8 = stp1_8; + stp2_9 = stp1_9; + stp2_14 = stp1_14; + stp2_15 = stp1_15; + + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, + stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, + stp2_13, stp2_11, stp2_12) + + stp2_16 = _mm_add_epi16(stp1_16, stp1_23); + stp2_17 = _mm_add_epi16(stp1_17, stp1_22); + stp2_18 = _mm_add_epi16(stp1_18, stp1_21); + stp2_19 = _mm_add_epi16(stp1_19, stp1_20); + stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); + stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); + stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); + stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); + + stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); + stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); + stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); + stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); + stp2_28 = _mm_add_epi16(stp1_27, stp1_28); + stp2_29 = _mm_add_epi16(stp1_26, stp1_29); + stp2_30 = _mm_add_epi16(stp1_25, stp1_30); + stp2_31 = _mm_add_epi16(stp1_24, stp1_31); + } + + // Stage7 + { + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); + + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); + const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); + const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); + + stp1_0 = _mm_add_epi16(stp2_0, stp2_15); + stp1_1 = _mm_add_epi16(stp2_1, stp2_14); + stp1_2 = _mm_add_epi16(stp2_2, stp2_13); + stp1_3 = _mm_add_epi16(stp2_3, stp2_12); + stp1_4 = _mm_add_epi16(stp2_4, stp2_11); + stp1_5 = _mm_add_epi16(stp2_5, stp2_10); + stp1_6 = _mm_add_epi16(stp2_6, stp2_9); + stp1_7 = _mm_add_epi16(stp2_7, stp2_8); + stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); + stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); + stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); + stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); + stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); + stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); + stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); + stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); + + stp1_16 = stp2_16; + stp1_17 = stp2_17; + stp1_18 = stp2_18; + stp1_19 = stp2_19; + + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, + stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, + stp1_21, stp1_26) + MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, + stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, + stp1_23, stp1_24) + + stp1_28 = stp2_28; + stp1_29 = stp2_29; + stp1_30 = stp2_30; + stp1_31 = stp2_31; + } + + // final stage + if (i < 4) { + // 1_D: Store 32 intermediate results for each 8x32 block. + col[i * 32 + 0] = _mm_add_epi16(stp1_0, stp1_31); + col[i * 32 + 1] = _mm_add_epi16(stp1_1, stp1_30); + col[i * 32 + 2] = _mm_add_epi16(stp1_2, stp1_29); + col[i * 32 + 3] = _mm_add_epi16(stp1_3, stp1_28); + col[i * 32 + 4] = _mm_add_epi16(stp1_4, stp1_27); + col[i * 32 + 5] = _mm_add_epi16(stp1_5, stp1_26); + col[i * 32 + 6] = _mm_add_epi16(stp1_6, stp1_25); + col[i * 32 + 7] = _mm_add_epi16(stp1_7, stp1_24); + col[i * 32 + 8] = _mm_add_epi16(stp1_8, stp1_23); + col[i * 32 + 9] = _mm_add_epi16(stp1_9, stp1_22); + col[i * 32 + 10] = _mm_add_epi16(stp1_10, stp1_21); + col[i * 32 + 11] = _mm_add_epi16(stp1_11, stp1_20); + col[i * 32 + 12] = _mm_add_epi16(stp1_12, stp1_19); + col[i * 32 + 13] = _mm_add_epi16(stp1_13, stp1_18); + col[i * 32 + 14] = _mm_add_epi16(stp1_14, stp1_17); + col[i * 32 + 15] = _mm_add_epi16(stp1_15, stp1_16); + col[i * 32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); + col[i * 32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); + col[i * 32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); + col[i * 32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); + col[i * 32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); + col[i * 32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); + col[i * 32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); + col[i * 32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); + col[i * 32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); + col[i * 32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); + col[i * 32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); + col[i * 32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); + col[i * 32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); + col[i * 32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); + col[i * 32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); + col[i * 32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); + } else { + // 2_D: Calculate the results and store them to destination. + in0 = _mm_add_epi16(stp1_0, stp1_31); + in1 = _mm_add_epi16(stp1_1, stp1_30); + in2 = _mm_add_epi16(stp1_2, stp1_29); + in3 = _mm_add_epi16(stp1_3, stp1_28); + in4 = _mm_add_epi16(stp1_4, stp1_27); + in5 = _mm_add_epi16(stp1_5, stp1_26); + in6 = _mm_add_epi16(stp1_6, stp1_25); + in7 = _mm_add_epi16(stp1_7, stp1_24); + in8 = _mm_add_epi16(stp1_8, stp1_23); + in9 = _mm_add_epi16(stp1_9, stp1_22); + in10 = _mm_add_epi16(stp1_10, stp1_21); + in11 = _mm_add_epi16(stp1_11, stp1_20); + in12 = _mm_add_epi16(stp1_12, stp1_19); + in13 = _mm_add_epi16(stp1_13, stp1_18); + in14 = _mm_add_epi16(stp1_14, stp1_17); + in15 = _mm_add_epi16(stp1_15, stp1_16); + in16 = _mm_sub_epi16(stp1_15, stp1_16); + in17 = _mm_sub_epi16(stp1_14, stp1_17); + in18 = _mm_sub_epi16(stp1_13, stp1_18); + in19 = _mm_sub_epi16(stp1_12, stp1_19); + in20 = _mm_sub_epi16(stp1_11, stp1_20); + in21 = _mm_sub_epi16(stp1_10, stp1_21); + in22 = _mm_sub_epi16(stp1_9, stp1_22); + in23 = _mm_sub_epi16(stp1_8, stp1_23); + in24 = _mm_sub_epi16(stp1_7, stp1_24); + in25 = _mm_sub_epi16(stp1_6, stp1_25); + in26 = _mm_sub_epi16(stp1_5, stp1_26); + in27 = _mm_sub_epi16(stp1_4, stp1_27); + in28 = _mm_sub_epi16(stp1_3, stp1_28); + in29 = _mm_sub_epi16(stp1_2, stp1_29); + in30 = _mm_sub_epi16(stp1_1, stp1_30); + in31 = _mm_sub_epi16(stp1_0, stp1_31); + + // Final rounding and shift + in0 = _mm_adds_epi16(in0, final_rounding); + in1 = _mm_adds_epi16(in1, final_rounding); + in2 = _mm_adds_epi16(in2, final_rounding); + in3 = _mm_adds_epi16(in3, final_rounding); + in4 = _mm_adds_epi16(in4, final_rounding); + in5 = _mm_adds_epi16(in5, final_rounding); + in6 = _mm_adds_epi16(in6, final_rounding); + in7 = _mm_adds_epi16(in7, final_rounding); + in8 = _mm_adds_epi16(in8, final_rounding); + in9 = _mm_adds_epi16(in9, final_rounding); + in10 = _mm_adds_epi16(in10, final_rounding); + in11 = _mm_adds_epi16(in11, final_rounding); + in12 = _mm_adds_epi16(in12, final_rounding); + in13 = _mm_adds_epi16(in13, final_rounding); + in14 = _mm_adds_epi16(in14, final_rounding); + in15 = _mm_adds_epi16(in15, final_rounding); + in16 = _mm_adds_epi16(in16, final_rounding); + in17 = _mm_adds_epi16(in17, final_rounding); + in18 = _mm_adds_epi16(in18, final_rounding); + in19 = _mm_adds_epi16(in19, final_rounding); + in20 = _mm_adds_epi16(in20, final_rounding); + in21 = _mm_adds_epi16(in21, final_rounding); + in22 = _mm_adds_epi16(in22, final_rounding); + in23 = _mm_adds_epi16(in23, final_rounding); + in24 = _mm_adds_epi16(in24, final_rounding); + in25 = _mm_adds_epi16(in25, final_rounding); + in26 = _mm_adds_epi16(in26, final_rounding); + in27 = _mm_adds_epi16(in27, final_rounding); + in28 = _mm_adds_epi16(in28, final_rounding); + in29 = _mm_adds_epi16(in29, final_rounding); + in30 = _mm_adds_epi16(in30, final_rounding); + in31 = _mm_adds_epi16(in31, final_rounding); + + in0 = _mm_srai_epi16(in0, 6); + in1 = _mm_srai_epi16(in1, 6); + in2 = _mm_srai_epi16(in2, 6); + in3 = _mm_srai_epi16(in3, 6); + in4 = _mm_srai_epi16(in4, 6); + in5 = _mm_srai_epi16(in5, 6); + in6 = _mm_srai_epi16(in6, 6); + in7 = _mm_srai_epi16(in7, 6); + in8 = _mm_srai_epi16(in8, 6); + in9 = _mm_srai_epi16(in9, 6); + in10 = _mm_srai_epi16(in10, 6); + in11 = _mm_srai_epi16(in11, 6); + in12 = _mm_srai_epi16(in12, 6); + in13 = _mm_srai_epi16(in13, 6); + in14 = _mm_srai_epi16(in14, 6); + in15 = _mm_srai_epi16(in15, 6); + in16 = _mm_srai_epi16(in16, 6); + in17 = _mm_srai_epi16(in17, 6); + in18 = _mm_srai_epi16(in18, 6); + in19 = _mm_srai_epi16(in19, 6); + in20 = _mm_srai_epi16(in20, 6); + in21 = _mm_srai_epi16(in21, 6); + in22 = _mm_srai_epi16(in22, 6); + in23 = _mm_srai_epi16(in23, 6); + in24 = _mm_srai_epi16(in24, 6); + in25 = _mm_srai_epi16(in25, 6); + in26 = _mm_srai_epi16(in26, 6); + in27 = _mm_srai_epi16(in27, 6); + in28 = _mm_srai_epi16(in28, 6); + in29 = _mm_srai_epi16(in29, 6); + in30 = _mm_srai_epi16(in30, 6); + in31 = _mm_srai_epi16(in31, 6); + + // Store results + _mm_store_si128((__m128i *)output, in0); + _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); + _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); + _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); + _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); + _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); + _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); + _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); + _mm_store_si128((__m128i *)(output + half_pitch * 8), in8); + _mm_store_si128((__m128i *)(output + half_pitch * 9), in9); + _mm_store_si128((__m128i *)(output + half_pitch * 10), in10); + _mm_store_si128((__m128i *)(output + half_pitch * 11), in11); + _mm_store_si128((__m128i *)(output + half_pitch * 12), in12); + _mm_store_si128((__m128i *)(output + half_pitch * 13), in13); + _mm_store_si128((__m128i *)(output + half_pitch * 14), in14); + _mm_store_si128((__m128i *)(output + half_pitch * 15), in15); + _mm_store_si128((__m128i *)(output + half_pitch * 16), in16); + _mm_store_si128((__m128i *)(output + half_pitch * 17), in17); + _mm_store_si128((__m128i *)(output + half_pitch * 18), in18); + _mm_store_si128((__m128i *)(output + half_pitch * 19), in19); + _mm_store_si128((__m128i *)(output + half_pitch * 20), in20); + _mm_store_si128((__m128i *)(output + half_pitch * 21), in21); + _mm_store_si128((__m128i *)(output + half_pitch * 22), in22); + _mm_store_si128((__m128i *)(output + half_pitch * 23), in23); + _mm_store_si128((__m128i *)(output + half_pitch * 24), in24); + _mm_store_si128((__m128i *)(output + half_pitch * 25), in25); + _mm_store_si128((__m128i *)(output + half_pitch * 26), in26); + _mm_store_si128((__m128i *)(output + half_pitch * 27), in27); + _mm_store_si128((__m128i *)(output + half_pitch * 28), in28); + _mm_store_si128((__m128i *)(output + half_pitch * 29), in29); + _mm_store_si128((__m128i *)(output + half_pitch * 30), in30); + _mm_store_si128((__m128i *)(output + half_pitch * 31), in31); + + output += 8; + } + } +} #endif diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index ddfdaba4f..9cb18143f 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -815,7 +815,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], - mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]); + mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]); vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, mbmi->second_ref_frame, diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 0e409f727..ba7570347 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -195,16 +195,6 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.y_stride, xd->dst.uv_stride, mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - } -#endif } } } @@ -212,7 +202,7 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { TX_TYPE tx_type = get_tx_type_16x16(xd, 0); -#ifdef DEC_DEBUG +#if 0 // def DEC_DEBUG if (dec_debug) { int i; printf("\n"); @@ -250,7 +240,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, // First do Y // if the first one is DCT_DCT assume all the rest are as well TX_TYPE tx_type = get_tx_type_8x8(xd, 0); -#ifdef DEC_DEBUG +#if 0 // def DEC_DEBUG if (dec_debug) { int i; printf("\n"); @@ -322,7 +312,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd); } -#ifdef DEC_DEBUG +#if 0 // def DEC_DEBUG if (dec_debug) { int i; printf("\n"); @@ -340,6 +330,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, TX_TYPE tx_type; int i, eobtotal = 0; MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; +#if 0 // def DEC_DEBUG + if (dec_debug) { + int i; + printf("\n"); + printf("predictor\n"); + for (i = 0; i < 384; i++) { + printf("%3d ", xd->predictor[i]); + if (i % 16 == 15) printf("\n"); + } + } +#endif if (mode == I8X8_PRED) { for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; @@ -420,7 +421,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.uv_stride, xd); } else { -#ifdef DEC_DEBUG +#if 0 // def DEC_DEBUG if (dec_debug) { int i; printf("\n"); @@ -834,14 +835,14 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, skip_recon_mb(pbi, xd, mb_row, mb_col); return; } -#ifdef DEC_DEBUG +#if 0 // def DEC_DEBUG if (dec_debug) printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size); #endif // moved to be performed before detokenization -// if (xd->segmentation_enabled) -// mb_init_dequantizer(pbi, xd); + // if (xd->segmentation_enabled) + // mb_init_dequantizer(pbi, xd); /* do prediction */ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { @@ -852,7 +853,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } } } else { -#ifdef DEC_DEBUG +#if 0 // def DEC_DEBUG if (dec_debug) printf("Decoding mb: %d %d interp %d\n", xd->mode_info_context->mbmi.mode, tx_size, @@ -872,6 +873,13 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, if (dec_debug) { int i, j; printf("\n"); + printf("predictor y\n"); + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) + printf("%3d ", xd->predictor[i * 16 + j]); + printf("\n"); + } + printf("\n"); printf("final y\n"); for (i = 0; i < 16; i++) { for (j = 0; j < 16; j++) @@ -994,9 +1002,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, mb_col < pc->cur_tile_mb_col_end; mb_col += 4) { if (vp9_read(bc, pc->sb64_coded)) { #ifdef DEC_DEBUG - dec_debug = (pc->current_video_frame == 1 && mb_row == 0 && mb_col == 0); + dec_debug = (pc->current_video_frame == 11 && pc->show_frame && + mb_row == 8 && mb_col == 0); if (dec_debug) - printf("Debug\n"); + printf("Debug Decode SB64\n"); #endif set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); @@ -1019,8 +1028,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, if (vp9_read(bc, pc->sb32_coded)) { #ifdef DEC_DEBUG - dec_debug = (pc->current_video_frame == 1 && - mb_row + y_idx_sb == 0 && mb_col + x_idx_sb == 0); + dec_debug = (pc->current_video_frame == 11 && pc->show_frame && + mb_row + y_idx_sb == 8 && mb_col + x_idx_sb == 0); + if (dec_debug) + printf("Debug Decode SB32\n"); #endif set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); vp9_decode_mb_mode_mv(pbi, @@ -1043,8 +1054,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, continue; } #ifdef DEC_DEBUG - dec_debug = (pc->current_video_frame == 1 && - mb_row + y_idx == 0 && mb_col + x_idx == 0); + dec_debug = (pc->current_video_frame == 11 && pc->show_frame && + mb_row + y_idx == 8 && mb_col + x_idx == 0); + if (dec_debug) + printf("Debug Decode MB\n"); #endif set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx); @@ -1265,6 +1278,143 @@ static void update_frame_size(VP9D_COMP *pbi) { vp9_update_mode_info_in_image(cm, cm->mi); } +static void setup_segmentation(VP9_COMMON *pc, MACROBLOCKD *xd, + BOOL_DECODER *header_bc) { + int i, j; + + // Is segmentation enabled + xd->segmentation_enabled = vp9_read_bit(header_bc); + + if (xd->segmentation_enabled) { + // Read whether or not the segmentation map is being explicitly updated + // this frame. + xd->update_mb_segmentation_map = vp9_read_bit(header_bc); + + // If so what method will be used. + if (xd->update_mb_segmentation_map) { + // Which macro block level features are enabled. Read the probs used to + // decode the segment id for each macro block. + for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { + xd->mb_segment_tree_probs[i] = vp9_read_bit(header_bc) ? + (vp9_prob)vp9_read_literal(header_bc, 8) : 255; + } + + // Read the prediction probs needed to decode the segment id + pc->temporal_update = vp9_read_bit(header_bc); + for (i = 0; i < PREDICTION_PROBS; i++) { + if (pc->temporal_update) { + pc->segment_pred_probs[i] = vp9_read_bit(header_bc) ? + (vp9_prob)vp9_read_literal(header_bc, 8) : 255; + } else { + pc->segment_pred_probs[i] = 255; + } + } + + if (pc->temporal_update) { + int count[4]; + const vp9_prob *p = xd->mb_segment_tree_probs; + vp9_prob *p_mod = xd->mb_segment_mispred_tree_probs; + + count[0] = p[0] * p[1]; + count[1] = p[0] * (256 - p[1]); + count[2] = (256 - p[0]) * p[2]; + count[3] = (256 - p[0]) * (256 - p[2]); + + p_mod[0] = get_binary_prob(count[1], count[2] + count[3]); + p_mod[1] = get_binary_prob(count[0], count[2] + count[3]); + p_mod[2] = get_binary_prob(count[0] + count[1], count[3]); + p_mod[3] = get_binary_prob(count[0] + count[1], count[2]); + } + } + // Is the segment data being updated + xd->update_mb_segmentation_data = vp9_read_bit(header_bc); + + if (xd->update_mb_segmentation_data) { + int data; + + xd->mb_segment_abs_delta = vp9_read_bit(header_bc); + + vp9_clearall_segfeatures(xd); + + // For each segmentation... + for (i = 0; i < MAX_MB_SEGMENTS; i++) { + // For each of the segments features... + for (j = 0; j < SEG_LVL_MAX; j++) { + // Is the feature enabled + if (vp9_read_bit(header_bc)) { + // Update the feature data and mask + vp9_enable_segfeature(xd, i, j); + + data = vp9_decode_unsigned_max(header_bc, + vp9_seg_feature_data_max(j)); + + // Is the segment data signed.. + if (vp9_is_segfeature_signed(j)) { + if (vp9_read_bit(header_bc)) + data = -data; + } + } else { + data = 0; + } + + vp9_set_segdata(xd, i, j, data); + } + } + } + } +} + +static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, + BOOL_DECODER *header_bc) { + int i; + + pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(header_bc); + pc->filter_level = vp9_read_literal(header_bc, 6); + pc->sharpness_level = vp9_read_literal(header_bc, 3); + +#if CONFIG_LOOP_DERING + if (vp9_read_bit(header_bc)) + pc->dering_enabled = 1 + vp9_read_literal(header_bc, 4); + else + pc->dering_enabled = 0; +#endif + + // Read in loop filter deltas applied at the MB level based on mode or ref + // frame. + xd->mode_ref_lf_delta_update = 0; + xd->mode_ref_lf_delta_enabled = vp9_read_bit(header_bc); + + if (xd->mode_ref_lf_delta_enabled) { + // Do the deltas need to be updated + xd->mode_ref_lf_delta_update = vp9_read_bit(header_bc); + + if (xd->mode_ref_lf_delta_update) { + // Send update + for (i = 0; i < MAX_REF_LF_DELTAS; i++) { + if (vp9_read_bit(header_bc)) { + // sign = vp9_read_bit( &header_bc ); + xd->ref_lf_deltas[i] = (signed char)vp9_read_literal(header_bc, 6); + + if (vp9_read_bit(header_bc)) + xd->ref_lf_deltas[i] = -xd->ref_lf_deltas[i]; // Apply sign + } + } + + // Send update + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { + if (vp9_read_bit(header_bc)) { + // sign = vp9_read_bit( &header_bc ); + xd->mode_lf_deltas[i] = (signed char)vp9_read_literal(header_bc, 6); + + if (vp9_read_bit(header_bc)) + xd->mode_lf_deltas[i] = -xd->mode_lf_deltas[i]; // Apply sign + } + } + } + } +} + + int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { BOOL_DECODER header_bc, residual_bc; VP9_COMMON *const pc = &pbi->common; @@ -1272,10 +1422,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { const uint8_t *data = (const uint8_t *)pbi->Source; const uint8_t *data_end = data + pbi->source_sz; ptrdiff_t first_partition_length_in_bytes = 0; - - int mb_row; - int i, j; - int corrupt_tokens = 0; + int mb_row, i, corrupt_tokens = 0; // printf("Decoding frame %d\n", pc->current_video_frame); /* start with no corruption of current frame */ @@ -1392,87 +1539,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc); pc->error_resilient_mode = vp9_read_bit(&header_bc); - /* Is segmentation enabled */ - xd->segmentation_enabled = (unsigned char)vp9_read_bit(&header_bc); - - if (xd->segmentation_enabled) { - // Read whether or not the segmentation map is being explicitly - // updated this frame. - xd->update_mb_segmentation_map = (unsigned char)vp9_read_bit(&header_bc); - - // If so what method will be used. - if (xd->update_mb_segmentation_map) { - // Which macro block level features are enabled - - // Read the probs used to decode the segment id for each macro - // block. - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { - xd->mb_segment_tree_probs[i] = vp9_read_bit(&header_bc) ? - (vp9_prob)vp9_read_literal(&header_bc, 8) : 255; - } - - // Read the prediction probs needed to decode the segment id - pc->temporal_update = (unsigned char)vp9_read_bit(&header_bc); - for (i = 0; i < PREDICTION_PROBS; i++) { - if (pc->temporal_update) { - pc->segment_pred_probs[i] = vp9_read_bit(&header_bc) ? - (vp9_prob)vp9_read_literal(&header_bc, 8) : 255; - } else { - pc->segment_pred_probs[i] = 255; - } - } - - if (pc->temporal_update) { - int count[4]; - const vp9_prob *p = xd->mb_segment_tree_probs; - vp9_prob *p_mod = xd->mb_segment_mispred_tree_probs; - - count[0] = p[0] * p[1]; - count[1] = p[0] * (256 - p[1]); - count[2] = (256 - p[0]) * p[2]; - count[3] = (256 - p[0]) * (256 - p[2]); - - p_mod[0] = get_binary_prob(count[1], count[2] + count[3]); - p_mod[1] = get_binary_prob(count[0], count[2] + count[3]); - p_mod[2] = get_binary_prob(count[0] + count[1], count[3]); - p_mod[3] = get_binary_prob(count[0] + count[1], count[2]); - } - } - // Is the segment data being updated - xd->update_mb_segmentation_data = (unsigned char)vp9_read_bit(&header_bc); - - if (xd->update_mb_segmentation_data) { - int data; - - xd->mb_segment_abs_delta = (unsigned char)vp9_read_bit(&header_bc); - - vp9_clearall_segfeatures(xd); - - // For each segmentation... - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // For each of the segments features... - for (j = 0; j < SEG_LVL_MAX; j++) { - // Is the feature enabled - if (vp9_read_bit(&header_bc)) { - // Update the feature data and mask - vp9_enable_segfeature(xd, i, j); - - data = vp9_decode_unsigned_max(&header_bc, - vp9_seg_feature_data_max(j)); - - // Is the segment data signed.. - if (vp9_is_segfeature_signed(j)) { - if (vp9_read_bit(&header_bc)) - data = -data; - } - } else - data = 0; - vp9_set_segdata(xd, i, j, data); - } - } - } - } + setup_segmentation(pc, xd, &header_bc); // Read common prediction model status flag probability updates for the // reference frame @@ -1505,48 +1573,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); } } - pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc); - pc->filter_level = vp9_read_literal(&header_bc, 6); - pc->sharpness_level = vp9_read_literal(&header_bc, 3); -#if CONFIG_LOOP_DERING - if (vp9_read_bit(&header_bc)) - pc->dering_enabled = 1 + vp9_read_literal(&header_bc, 4); - else - pc->dering_enabled = 0; -#endif - - /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */ - xd->mode_ref_lf_delta_update = 0; - xd->mode_ref_lf_delta_enabled = (unsigned char)vp9_read_bit(&header_bc); - - if (xd->mode_ref_lf_delta_enabled) { - /* Do the deltas need to be updated */ - xd->mode_ref_lf_delta_update = (unsigned char)vp9_read_bit(&header_bc); - if (xd->mode_ref_lf_delta_update) { - /* Send update */ - for (i = 0; i < MAX_REF_LF_DELTAS; i++) { - if (vp9_read_bit(&header_bc)) { - /*sign = vp9_read_bit( &header_bc );*/ - xd->ref_lf_deltas[i] = (signed char)vp9_read_literal(&header_bc, 6); - - if (vp9_read_bit(&header_bc)) /* Apply sign */ - xd->ref_lf_deltas[i] = -xd->ref_lf_deltas[i]; - } - } - - /* Send update */ - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { - if (vp9_read_bit(&header_bc)) { - /*sign = vp9_read_bit( &header_bc );*/ - xd->mode_lf_deltas[i] = (signed char)vp9_read_literal(&header_bc, 6); - - if (vp9_read_bit(&header_bc)) /* Apply sign */ - xd->mode_lf_deltas[i] = -xd->mode_lf_deltas[i]; - } - } - } - } + setup_loopfilter(pc, xd, &header_bc); // Dummy read for now vp9_read_literal(&header_bc, 2); @@ -1701,6 +1729,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_zero(pbi->common.fc.coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); vp9_zero(pbi->common.fc.coef_counts_32x32); + vp9_zero(pbi->common.fc.eob_branch_counts); vp9_zero(pbi->common.fc.ymode_counts); vp9_zero(pbi->common.fc.sb_ymode_counts); vp9_zero(pbi->common.fc.uv_mode_counts); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 57eed1759..9aebcdcfc 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -218,6 +218,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[0] = 0; vp9_add_constant_residual_8x8(out, pred, pitch, dest, stride); +#if !CONFIG_SCATTERSCAN } else if (eob <= 10) { input[1] *= dq[1]; input[2] *= dq[1]; @@ -237,6 +238,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[24] = 0; vp9_add_residual_8x8(output, pred, pitch, dest, stride); +#endif } else { int i; @@ -301,6 +303,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, input[0] = 0; vp9_add_constant_residual_16x16(out, pred, pitch, dest, stride); +#if !CONFIG_SCATTERSCAN } else if (eob <= 10) { input[0] *= dq[0]; @@ -323,6 +326,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, input[48] = 0; vp9_add_residual_16x16(output, pred, pitch, dest, stride); +#endif } else { int i; @@ -352,6 +356,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, vp9_short_idct1_32x32(input, output); vp9_add_constant_residual_32x32(output[0], pred, pitch, dest, stride); input[0] = 0; +#if !CONFIG_SCATTERSCAN } else if (eob <= 10) { input[1] = input[1] * dq[1] / 2; input[2] = input[2] * dq[1] / 2; @@ -372,6 +377,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, input[96] = 0; vp9_add_residual_32x32(output, pred, pitch, dest, stride); +#endif } else { int i; for (i = 1; i < 1024; i++) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 0a584d79d..b6eec373e 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -65,8 +65,11 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { #define INCREMENT_COUNT(token) \ do { \ - coef_counts[type][ref][get_coef_band(txfm_size, c)][pt][token]++; \ - pt = vp9_get_coef_context(&recent_energy, token); \ + coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] \ + [pt][token]++; \ + token_cache[c] = token; \ + pt = vp9_get_coef_context(scan, nb, pad, token_cache, \ + c, default_eob); \ } while (0) #if CONFIG_CODE_NONZEROCOUNT @@ -103,8 +106,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, int aidx, lidx; ENTROPY_CONTEXT above_ec, left_ec; FRAME_CONTEXT *const fc = &dx->common.fc; - int recent_energy = 0; - int pt, c = 0; + int pt, c = 0, pad, default_eob; vp9_coeff_probs *coef_probs; vp9_prob *prob; vp9_coeff_count *coef_counts; @@ -113,7 +115,8 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, uint16_t nzc = 0; uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx]; #endif - const int *scan; + const int *scan, *nb; + uint8_t token_cache[1024]; if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { aidx = vp9_block2above_sb64[txfm_size][block_idx]; @@ -129,7 +132,8 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, switch (txfm_size) { default: case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(xd, block_idx); + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_4x4(xd, block_idx) : DCT_DCT; switch (tx_type) { default: scan = vp9_default_zig_zag1d_4x4; @@ -145,17 +149,50 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, left_ec = L0[lidx] != 0; coef_probs = fc->coef_probs_4x4; coef_counts = fc->coef_counts_4x4; + default_eob = 16; break; } - case TX_8X8: - scan = vp9_default_zig_zag1d_8x8; + case TX_8X8: { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int sz = 3 + sb_type, x = block_idx & ((1 << sz) - 1); + const int y = block_idx - x; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; + switch (tx_type) { + default: + scan = vp9_default_zig_zag1d_8x8; + break; + case ADST_DCT: + scan = vp9_row_scan_8x8; + break; + case DCT_ADST: + scan = vp9_col_scan_8x8; + break; + } coef_probs = fc->coef_probs_8x8; coef_counts = fc->coef_counts_8x8; above_ec = (A0[aidx] + A0[aidx + 1]) != 0; left_ec = (L0[lidx] + L0[lidx + 1]) != 0; + default_eob = 64; break; - case TX_16X16: - scan = vp9_default_zig_zag1d_16x16; + } + case TX_16X16: { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int sz = 4 + sb_type, x = block_idx & ((1 << sz) - 1); + const int y = block_idx - x; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; + switch (tx_type) { + default: + scan = vp9_default_zig_zag1d_16x16; + break; + case ADST_DCT: + scan = vp9_row_scan_16x16; + break; + case DCT_ADST: + scan = vp9_col_scan_16x16; + break; + } coef_probs = fc->coef_probs_16x16; coef_counts = fc->coef_counts_16x16; if (type == PLANE_TYPE_UV) { @@ -167,7 +204,9 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0; left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0; } + default_eob = 256; break; + } case TX_32X32: scan = vp9_default_zig_zag1d_32x32; coef_probs = fc->coef_probs_32x32; @@ -191,10 +230,13 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3] + L1[lidx] + L1[lidx + 1] + L1[lidx + 2] + L1[lidx + 3]) != 0; } + default_eob = 1024; break; } VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec); + nb = vp9_get_coef_neighbors_handle(scan, &pad); + while (1) { int val; const uint8_t *cat6 = cat6_prob; @@ -205,8 +247,10 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, if (nzc == nzc_expected) break; #endif - prob = coef_probs[type][ref][get_coef_band(txfm_size, c)][pt]; + prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt]; #if CONFIG_CODE_NONZEROCOUNT == 0 + fc->eob_branch_counts[txfm_size][type][ref] + [get_coef_band(scan, txfm_size, c)][pt]++; if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; #endif @@ -222,7 +266,7 @@ SKIP_START: if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[type][ref][get_coef_band(txfm_size, c)][pt]; + prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ @@ -287,7 +331,8 @@ SKIP_START: #if CONFIG_CODE_NONZEROCOUNT == 0 if (c < seg_eob) - coef_counts[type][ref][get_coef_band(txfm_size, c)][pt][DCT_EOB_TOKEN]++; + coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] + [pt][DCT_EOB_TOKEN]++; #endif A0[aidx] = L0[lidx] = c > 0; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index c0ae5ba36..ec729230a 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1758,6 +1758,9 @@ static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) { static void build_tree_distribution(vp9_coeff_probs *coef_probs, vp9_coeff_count *coef_counts, + unsigned int (*eob_branch_ct)[REF_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS], #ifdef ENTROPY_STATS VP9_COMP *cpi, vp9_coeff_accum *context_counters, @@ -1779,10 +1782,18 @@ static void build_tree_distribution(vp9_coeff_probs *coef_probs, coef_probs[i][j][k][l], coef_branch_ct[i][j][k][l], coef_counts[i][j][k][l], 0); + coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] - + coef_branch_ct[i][j][k][l][0][0]; + coef_probs[i][j][k][l][0] = + get_binary_prob(coef_branch_ct[i][j][k][l][0][0], + coef_branch_ct[i][j][k][l][0][1]); #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters[i][j][k][l][t] += coef_counts[i][j][k][l][t]; + if (!cpi->dummy_packing) { + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + context_counters[i][j][k][l][t] += coef_counts[i][j][k][l][t]; + context_counters[i][j][k][l][MAX_ENTROPY_TOKENS] += + eob_branch_ct[i][j][k][l]; + } #endif } } @@ -1793,24 +1804,28 @@ static void build_tree_distribution(vp9_coeff_probs *coef_probs, static void build_coeff_contexts(VP9_COMP *cpi) { build_tree_distribution(cpi->frame_coef_probs_4x4, cpi->coef_counts_4x4, + cpi->common.fc.eob_branch_counts[TX_4X4], #ifdef ENTROPY_STATS cpi, context_counters_4x4, #endif cpi->frame_branch_ct_4x4, BLOCK_TYPES); build_tree_distribution(cpi->frame_coef_probs_8x8, cpi->coef_counts_8x8, + cpi->common.fc.eob_branch_counts[TX_8X8], #ifdef ENTROPY_STATS cpi, context_counters_8x8, #endif cpi->frame_branch_ct_8x8, BLOCK_TYPES); build_tree_distribution(cpi->frame_coef_probs_16x16, cpi->coef_counts_16x16, + cpi->common.fc.eob_branch_counts[TX_16X16], #ifdef ENTROPY_STATS cpi, context_counters_16x16, #endif cpi->frame_branch_ct_16x16, BLOCK_TYPES); build_tree_distribution(cpi->frame_coef_probs_32x32, cpi->coef_counts_32x32, + cpi->common.fc.eob_branch_counts[TX_32X32], #ifdef ENTROPY_STATS cpi, context_counters_32x32, #endif diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 439006156..491ea62b5 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -173,8 +173,8 @@ struct macroblock { void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx); void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2); - void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx); - void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx); + void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type); + void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type); }; #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 5e17c552d..fc95c3274 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -757,7 +757,7 @@ static int pick_mb_modes(VP9_COMP *cpi, // as a predictor for MBs that follow in the SB if (cm->frame_type == KEY_FRAME) { int r, d; -#ifdef ENC_DEBUG +#if 0 // ENC_DEBUG if (enc_debug) printf("intra pick_mb_modes %d %d\n", mb_row, mb_col); #endif @@ -776,7 +776,7 @@ static int pick_mb_modes(VP9_COMP *cpi, } else { int seg_id, r, d; -#ifdef ENC_DEBUG +#if 0 // ENC_DEBUG if (enc_debug) printf("inter pick_mb_modes %d %d\n", mb_row, mb_col); #endif @@ -1276,6 +1276,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cpi->coef_counts_8x8); vp9_zero(cpi->coef_counts_16x16); vp9_zero(cpi->coef_counts_32x32); + vp9_zero(cm->fc.eob_branch_counts); #if CONFIG_CODE_NONZEROCOUNT vp9_zero(cm->fc.nzc_counts_4x4); vp9_zero(cm->fc.nzc_counts_8x8); @@ -2057,8 +2058,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, assert(!xd->mode_info_context->mbmi.sb_type); #ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 1 && - mb_row == 0 && mb_col == 0 && output_enabled); + enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && + mb_row == 8 && mb_col == 0 && output_enabled); if (enc_debug) printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled); #endif @@ -2105,7 +2106,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } if (mbmi->ref_frame == INTRA_FRAME) { -#ifdef ENC_DEBUG +#if 0 // def ENC_DEBUG if (enc_debug) { printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip, mbmi->txfm_size); @@ -2316,10 +2317,16 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, const int mis = cm->mode_info_stride; #ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 1 && - mb_row == 0 && mb_col == 0 && output_enabled); - if (enc_debug) + enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && + mb_row == 8 && mb_col == 0 && output_enabled); + if (enc_debug) { printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled); + printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n", + mi->mbmi.mode, x->skip, mi->mbmi.txfm_size, + mi->mbmi.ref_frame, mi->mbmi.second_ref_frame, + mi->mbmi.mv[0].as_mv.row, mi->mbmi.mv[0].as_mv.col, + mi->mbmi.interp_filter); + } #endif if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { @@ -2537,8 +2544,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, const int mis = cm->mode_info_stride; #ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 1 && - mb_row == 0 && mb_col == 0 && output_enabled); + enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && + mb_row == 8 && mb_col == 0 && output_enabled); if (enc_debug) printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled); #endif diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 9e5bcea16..eddacb872 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -155,12 +155,12 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { tx_type = get_tx_type_8x8(xd, ib); if (tx_type != DCT_DCT) { vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); - x->quantize_b_8x8(x, idx); + x->quantize_b_8x8(x, idx, tx_type); vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 16, tx_type); } else { x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x, idx); + x->quantize_b_8x8(x, idx, DCT_DCT); vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); } } else { diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index dae177a3c..3ad429a9e 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -528,9 +528,16 @@ static const int plane_rd_mult[4] = { // This function is a place holder for now but may ultimately need // to scan previous tokens to work out the correct context. -static int trellis_get_coeff_context(int token) { - int recent_energy = 0; - return vp9_get_coef_context(&recent_energy, token); +static int trellis_get_coeff_context(const int *scan, + const int *nb, + int idx, int token, + uint8_t *token_cache, + int pad, int l) { + int bak = token_cache[idx], pt; + token_cache[idx] = token; + pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l); + token_cache[idx] = bak; + return pt; } static void optimize_b(VP9_COMMON *const cm, @@ -552,9 +559,10 @@ static void optimize_b(VP9_COMMON *const cm, int rate0, rate1, error0, error1, t0, t1; int best, band, pt; int err_mult = plane_rd_mult[type]; - int default_eob; - int const *scan; + int default_eob, pad; + int const *scan, *nb; const int mul = 1 + (tx_size == TX_32X32); + uint8_t token_cache[1024]; #if CONFIG_CODE_NONZEROCOUNT // TODO(debargha): the dynamic programming approach used in this function // is not compatible with the true rate cost when nzcs are used. Note @@ -590,20 +598,40 @@ static void optimize_b(VP9_COMMON *const cm, } break; } - case TX_8X8: - scan = vp9_default_zig_zag1d_8x8; + case TX_8X8: { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; + const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1)); + if (tx_type == DCT_ADST) { + scan = vp9_col_scan_8x8; + } else if (tx_type == ADST_DCT) { + scan = vp9_row_scan_8x8; + } else { + scan = vp9_default_zig_zag1d_8x8; + } default_eob = 64; #if CONFIG_CODE_NONZEROCOUNT nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type]; #endif break; - case TX_16X16: - scan = vp9_default_zig_zag1d_16x16; + } + case TX_16X16: { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; + const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2)); + if (tx_type == DCT_ADST) { + scan = vp9_col_scan_16x16; + } else if (tx_type == ADST_DCT) { + scan = vp9_row_scan_16x16; + } else { + scan = vp9_default_zig_zag1d_16x16; + } default_eob = 256; #if CONFIG_CODE_NONZEROCOUNT nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type]; #endif break; + } case TX_32X32: scan = vp9_default_zig_zag1d_32x32; default_eob = 1024; @@ -631,6 +659,10 @@ static void optimize_b(VP9_COMMON *const cm, tokens[eob][0].qc = 0; *(tokens[eob] + 1) = *(tokens[eob] + 0); next = eob; + for (i = 0; i < eob; i++) + token_cache[i] = vp9_dct_value_tokens_ptr[qcoeff_ptr[scan[i]]].Token; + nb = vp9_get_coef_neighbors_handle(scan, &pad); + for (i = eob; i-- > i0;) { int base_bits, d2, dx; #if CONFIG_CODE_NONZEROCOUNT @@ -650,8 +682,9 @@ static void optimize_b(VP9_COMMON *const cm, t0 = (vp9_dct_value_tokens_ptr + x)->Token; /* Consider both possible successor states. */ if (next < default_eob) { - band = get_coef_band(tx_size, i + 1); - pt = trellis_get_coeff_context(t0); + band = get_coef_band(scan, tx_size, i + 1); + pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, + pad, default_eob); rate0 += mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token]; rate1 += @@ -708,14 +741,16 @@ static void optimize_b(VP9_COMMON *const cm, t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token; } if (next < default_eob) { - band = get_coef_band(tx_size, i + 1); + band = get_coef_band(scan, tx_size, i + 1); if (t0 != DCT_EOB_TOKEN) { - pt = trellis_get_coeff_context(t0); + pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, + pad, default_eob); rate0 += mb->token_costs[tx_size][type][ref][band][pt][ tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { - pt = trellis_get_coeff_context(t1); + pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache, + pad, default_eob); rate1 += mb->token_costs[tx_size][type][ref][band][pt][ tokens[next][1].token]; } @@ -748,7 +783,7 @@ static void optimize_b(VP9_COMMON *const cm, * add a new trellis node, but we do need to update the costs. */ else { - band = get_coef_band(tx_size, i + 1); + band = get_coef_band(scan, tx_size, i + 1); t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ @@ -767,7 +802,7 @@ static void optimize_b(VP9_COMMON *const cm, } /* Now pick the best path through the whole trellis. */ - band = get_coef_band(tx_size, i + 1); + band = get_coef_band(scan, tx_size, i + 1); VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 13958c03d..881fce50f 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -196,13 +196,26 @@ void vp9_quantize_mb_4x4(MACROBLOCK *x) { vp9_quantize_mbuv_4x4(x); } -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) { +void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { MACROBLOCKD *const xd = &mb->e_mbd; int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx; int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx; const int c_idx = plane_idx(xd, b_idx); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; + const int *pt_scan; + + switch (tx_type) { + case ADST_DCT: + pt_scan = vp9_row_scan_8x8; + break; + case DCT_ADST: + pt_scan = vp9_col_scan_8x8; + break; + default: + pt_scan = vp9_default_zig_zag1d_8x8; + break; + } vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); @@ -254,7 +267,7 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) { } } for (i = 1; i < 64; i++) { - rc = vp9_default_zig_zag1d_8x8[i]; + rc = pt_scan[i]; z = coeff_ptr[rc]; zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value); // The original code was incrementing zero_run while keeping it at @@ -303,7 +316,8 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) { } #endif for (i = 0; i < 16; i += 4) { - x->quantize_b_8x8(x, i); + TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (i & 8) + ((i & 4) >> 1)); + x->quantize_b_8x8(x, i, tx_type); } } @@ -316,7 +330,7 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { } #endif for (i = 16; i < 24; i += 4) - x->quantize_b_8x8(x, i); + x->quantize_b_8x8(x, i, DCT_DCT); } void vp9_quantize_mb_8x8(MACROBLOCK *x) { @@ -325,13 +339,14 @@ void vp9_quantize_mb_8x8(MACROBLOCK *x) { } void vp9_quantize_mby_16x16(MACROBLOCK *x) { + TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, 0); #if CONFIG_CODE_NONZEROCOUNT int i; for (i = 0; i < 16; i++) { x->e_mbd.nzcs[i] = 0; } #endif - x->quantize_b_16x16(x, 0); + x->quantize_b_16x16(x, 0, tx_type); } void vp9_quantize_mb_16x16(MACROBLOCK *x) { @@ -400,11 +415,24 @@ static void quantize(int16_t *zbin_boost_orig_ptr, #endif } -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) { +void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { MACROBLOCKD *const xd = &mb->e_mbd; const int c_idx = plane_idx(xd, b_idx); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; + const int *pt_scan; + + switch (tx_type) { + case ADST_DCT: + pt_scan = vp9_row_scan_16x16; + break; + case DCT_ADST: + pt_scan = vp9_col_scan_16x16; + break; + default: + pt_scan = vp9_default_zig_zag1d_16x16; + break; + } quantize(b->zrun_zbin_boost, mb->coeff + 16 * b_idx, @@ -418,7 +446,7 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) { #if CONFIG_CODE_NONZEROCOUNT &xd->nzcs[b_idx], #endif - vp9_default_zig_zag1d_16x16, 1); + pt_scan, 1); } void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) { @@ -450,15 +478,21 @@ void vp9_quantize_sby_32x32(MACROBLOCK *x) { void vp9_quantize_sby_16x16(MACROBLOCK *x) { int n; - for (n = 0; n < 4; n++) - x->quantize_b_16x16(x, n * 16); + for (n = 0; n < 4; n++) { + TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, + (16 * (n & 2)) + ((n & 1) * 4)); + x->quantize_b_16x16(x, n * 16, tx_type); + } } void vp9_quantize_sby_8x8(MACROBLOCK *x) { int n; - for (n = 0; n < 16; n++) - x->quantize_b_8x8(x, n * 4); + for (n = 0; n < 16; n++) { + TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, + (4 * (n & 12)) + ((n & 3) * 2)); + x->quantize_b_8x8(x, n * 4, tx_type); + } } void vp9_quantize_sby_4x4(MACROBLOCK *x) { @@ -476,15 +510,15 @@ void vp9_quantize_sby_4x4(MACROBLOCK *x) { } void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { - x->quantize_b_16x16(x, 64); - x->quantize_b_16x16(x, 80); + x->quantize_b_16x16(x, 64, DCT_DCT); + x->quantize_b_16x16(x, 80, DCT_DCT); } void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { int i; for (i = 64; i < 96; i += 4) - x->quantize_b_8x8(x, i); + x->quantize_b_8x8(x, i, DCT_DCT); } void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { @@ -504,15 +538,21 @@ void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { int n; - for (n = 0; n < 16; n++) - x->quantize_b_16x16(x, n * 16); + for (n = 0; n < 16; n++) { + TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, + (16 * (n & 12)) + ((n & 3) * 4)); + x->quantize_b_16x16(x, n * 16, tx_type); + } } void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { int n; - for (n = 0; n < 64; n++) - x->quantize_b_8x8(x, n * 4); + for (n = 0; n < 64; n++) { + TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, + (4 * (n & 56)) + ((n & 7) * 2)); + x->quantize_b_8x8(x, n * 4, tx_type); + } } void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { @@ -538,14 +578,14 @@ void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { int i; for (i = 256; i < 384; i += 16) - x->quantize_b_16x16(x, i); + x->quantize_b_16x16(x, i, DCT_DCT); } void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { int i; for (i = 256; i < 384; i += 4) - x->quantize_b_8x8(x, i); + x->quantize_b_8x8(x, i, DCT_DCT); } void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 739254025..6ba6cbdd9 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -29,8 +29,8 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type); void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx); void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2); -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx); -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx); +void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type); +void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type); void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx); void vp9_quantize_mb_4x4(MACROBLOCK *x); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 04b732a45..ad9b96050 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -156,21 +156,9 @@ static void fill_token_costs(vp9_coeff_count *c, for (j = 0; j < REF_TYPES; j++) for (k = 0; k < COEF_BANDS; k++) for (l = 0; l < PREV_COEF_CONTEXTS; l++) { -#if CONFIG_CODE_NONZEROCOUNT - // All costs are without the EOB node vp9_cost_tokens_skip((int *)(c[i][j][k][l]), p[i][j][k][l], vp9_coef_tree); -#else - if (l == 0 && k > 0) - vp9_cost_tokens_skip((int *)(c[i][j][k][l]), - p[i][j][k][l], - vp9_coef_tree); - else - vp9_cost_tokens((int *)(c[i][j][k][l]), - p[i][j][k][l], - vp9_coef_tree); -#endif } } @@ -448,8 +436,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int pt; const int eob = xd->eobs[ib]; int c = 0; - int cost = 0; - const int *scan; + int cost = 0, pad; + const int *scan, *nb; const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; const int ref = mbmi->ref_frame != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = @@ -464,9 +452,12 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int nzc_context = vp9_get_nzc_context(cm, xd, ib); unsigned int *nzc_cost; #else - int seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; + vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] + [ENTROPY_NODES]; #endif + int seg_eob, default_eob; + uint8_t token_cache[1024]; // Check for consistency of tx_size with mode info if (type == PLANE_TYPE_Y_WITH_DC) { @@ -485,8 +476,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, #if CONFIG_CODE_NONZEROCOUNT nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type]; #else - seg_eob = 16; + coef_probs = cm->fc.coef_probs_4x4; #endif + seg_eob = 16; if (tx_type == ADST_DCT) { scan = vp9_row_scan_4x4; } else if (tx_type == DCT_ADST) { @@ -496,23 +488,46 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, } break; } - case TX_8X8: + case TX_8X8: { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; a_ec = (a[0] + a[1]) != 0; l_ec = (l[0] + l[1]) != 0; - scan = vp9_default_zig_zag1d_8x8; + if (tx_type == ADST_DCT) { + scan = vp9_row_scan_8x8; + } else if (tx_type == DCT_ADST) { + scan = vp9_col_scan_8x8; + } else { + scan = vp9_default_zig_zag1d_8x8; + } #if CONFIG_CODE_NONZEROCOUNT nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type]; #else - seg_eob = 64; + coef_probs = cm->fc.coef_probs_8x8; #endif + seg_eob = 64; break; - case TX_16X16: - scan = vp9_default_zig_zag1d_16x16; + } + case TX_16X16: { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; + if (tx_type == ADST_DCT) { + scan = vp9_row_scan_16x16; + } else if (tx_type == DCT_ADST) { + scan = vp9_col_scan_16x16; + } else { + scan = vp9_default_zig_zag1d_16x16; + } #if CONFIG_CODE_NONZEROCOUNT nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type]; #else - seg_eob = 256; + coef_probs = cm->fc.coef_probs_16x16; #endif + seg_eob = 256; if (type == PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; @@ -521,13 +536,15 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; } break; + } case TX_32X32: scan = vp9_default_zig_zag1d_32x32; #if CONFIG_CODE_NONZEROCOUNT nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type]; #else - seg_eob = 1024; + coef_probs = cm->fc.coef_probs_32x32; #endif + seg_eob = 1024; if (type == PLANE_TYPE_UV) { ENTROPY_CONTEXT *a2, *a3, *l2, *l3; a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); @@ -551,6 +568,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); + nb = vp9_get_coef_neighbors_handle(scan, &pad); + default_eob = seg_eob; #if CONFIG_CODE_NONZEROCOUNT == 0 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) @@ -558,7 +577,6 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, #endif { - int recent_energy = 0; #if CONFIG_CODE_NONZEROCOUNT int nzc = 0; #endif @@ -568,16 +586,24 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, #if CONFIG_CODE_NONZEROCOUNT nzc += (v != 0); #endif - cost += token_costs[get_coef_band(tx_size, c)][pt][t]; + token_cache[c] = t; + cost += token_costs[get_coef_band(scan, tx_size, c)][pt][t]; cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_get_coef_context(&recent_energy, t); +#if !CONFIG_CODE_NONZEROCOUNT + if (!c || token_cache[c - 1]) + cost += vp9_cost_bit(coef_probs[type][ref] + [get_coef_band(scan, tx_size, c)] + [pt][0], 1); +#endif + pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); } #if CONFIG_CODE_NONZEROCOUNT cost += nzc_cost[nzc]; #else if (c < seg_eob) - cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)] - [pt][DCT_EOB_TOKEN]; + cost += mb->token_costs[tx_size][type][ref] + [get_coef_band(scan, tx_size, c)] + [pt][DCT_EOB_TOKEN]; #endif } @@ -1494,7 +1520,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); else x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x, idx); + x->quantize_b_8x8(x, idx, tx_type); // compute quantization mse of 8x8 block distortion = vp9_block_error_c((x->block + idx)->coeff, @@ -2424,13 +2450,15 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, &xd->scale_factor[0], 4, 4, 0 /* no avg */, &xd->subpix); + // TODO(debargha): Make this work properly with the + // implicit-compoundinter-weight experiment when implicit + // weighting for splitmv modes is turned on. if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_inter_predictor(*(bd->base_second_pre) + bd->pre, - bd->pre_stride, - bd->predictor, 16, - &bd->bmi.as_mv[1], - &xd->scale_factor[1], - 4, 4, 1 /* avg */, &xd->subpix); + vp9_build_inter_predictor( + *(bd->base_second_pre) + bd->pre, bd->pre_stride, bd->predictor, 16, + &bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4, + 1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */, + &xd->subpix); } vp9_subtract_b(be, bd, 16); @@ -2486,12 +2514,14 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre; - vp9_build_inter_predictor(*base_pre + bd->pre, - bd->pre_stride, - bd->predictor, 16, - &bd->bmi.as_mv[which_mv], - &xd->scale_factor[which_mv], - 8, 8, which_mv, &xd->subpix); + // TODO(debargha): Make this work properly with the + // implicit-compoundinter-weight experiment when implicit + // weighting for splitmv modes is turned on. + vp9_build_inter_predictor( + *base_pre + bd->pre, bd->pre_stride, bd->predictor, 16, + &bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8, + which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + &xd->subpix); } vp9_subtract_4b_c(be, bd, 16); @@ -2499,7 +2529,7 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { if (otherrd) { x->fwd_txm8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(x, idx); + x->quantize_b_8x8(x, idx, DCT_DCT); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_8X8; @@ -2553,7 +2583,7 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, } } x->fwd_txm8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(x, idx); + x->quantize_b_8x8(x, idx, DCT_DCT); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, @@ -3866,27 +3896,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int sse, var; int tmp_rate_y, tmp_rate_u, tmp_rate_v; int tmp_dist_y, tmp_dist_u, tmp_dist_v; - // TODO(jkoleszar): these 2 y/uv should be replaced with one call to - // vp9_build_interintra_16x16_predictors_mb(). - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, - mb_row, mb_col); - -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); - } -#endif - - vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256, - xd->predictor + 320, 8, - mb_row, mb_col); - -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mbuv(xd, xd->predictor + 256, - xd->predictor + 320, 8); - } -#endif + vp9_build_inter16x16_predictors_mb(xd, xd->predictor, + xd->predictor + 256, + xd->predictor + 320, + 16, 8, mb_row, mb_col); var = vp9_variance16x16(*(b->base_src), b->src_stride, xd->predictor, 16, &sse); // Note our transform coeffs are 8 times an orthogonal transform. @@ -3986,24 +3999,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.uv_stride, mb_row, mb_col); } else { - // TODO(jkoleszar): These y/uv fns can be replaced with their mb - // equivalent - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); - } -#endif - vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - } -#endif + vp9_build_inter16x16_predictors_mb(xd, xd->predictor, + xd->predictor + 256, + xd->predictor + 320, + 16, 8, mb_row, mb_col); } } @@ -4586,7 +4585,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(x->partition_info, &tmp_best_partition, sizeof(PARTITION_INFO)); for (i = 0; i < 16; i++) { - xd->block[i].bmi = tmp_best_bmodes[i]; + xd->block[i].bmi = xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; } } diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index dc1d3d48d..22a12f4a8 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -54,7 +54,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, &pred[0], 16, &subpel_mv, &xd->scale_factor[which_mv], - 16, 16, which_mv, &xd->subpix); + 16, 16, + which_mv << + (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + &xd->subpix); stride = (stride + 1) >> 1; @@ -62,13 +65,19 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, &pred[256], 8, &fullpel_mv, &subpel_mv, &xd->scale_factor_uv[which_mv], - 8, 8, which_mv, &xd->subpix); + 8, 8, + which_mv << + (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + &xd->subpix); vp9_build_inter_predictor_q4(v_mb_ptr, stride, &pred[320], 8, &fullpel_mv, &subpel_mv, &xd->scale_factor_uv[which_mv], - 8, 8, which_mv, &xd->subpix); + 8, 8, + which_mv << + (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + &xd->subpix); } void vp9_temporal_filter_apply_c(uint8_t *frame1, diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index cead2a606..6e2b8474d 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -109,6 +109,8 @@ static void fill_value_tokens() { vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } +extern const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad); + static void tokenize_b(VP9_COMP *cpi, MACROBLOCKD *xd, const int ib, @@ -119,18 +121,18 @@ static void tokenize_b(VP9_COMP *cpi, MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ int c = 0; - int recent_energy = 0; const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib; - int seg_eob; + int seg_eob, default_eob, pad; const int segment_id = mbmi->segment_id; const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; - const int *scan; + const int *scan, *nb; vp9_coeff_count *counts; vp9_coeff_probs *probs; const int ref = mbmi->ref_frame != INTRA_FRAME; ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + uint8_t token_cache[1024]; #if CONFIG_CODE_NONZEROCOUNT int zerosleft, nzc = 0; if (eob == 0) @@ -179,15 +181,29 @@ static void tokenize_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_4x4; break; } - case TX_8X8: + case TX_8X8: { + const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; a_ec = (a[0] + a[1]) != 0; l_ec = (l[0] + l[1]) != 0; seg_eob = 64; scan = vp9_default_zig_zag1d_8x8; + if (tx_type != DCT_DCT) { + if (tx_type == ADST_DCT) { + scan = vp9_row_scan_8x8; + } else if (tx_type == DCT_ADST) { + scan = vp9_col_scan_8x8; + } + } counts = cpi->coef_counts_8x8; probs = cpi->common.fc.coef_probs_8x8; break; - case TX_16X16: + } + case TX_16X16: { + const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; @@ -197,9 +213,17 @@ static void tokenize_b(VP9_COMP *cpi, } seg_eob = 256; scan = vp9_default_zig_zag1d_16x16; + if (tx_type != DCT_DCT) { + if (tx_type == ADST_DCT) { + scan = vp9_row_scan_16x16; + } else if (tx_type == DCT_ADST) { + scan = vp9_col_scan_16x16; + } + } counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; break; + } case TX_32X32: if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3] + @@ -220,12 +244,14 @@ static void tokenize_b(VP9_COMP *cpi, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); + nb = vp9_get_coef_neighbors_handle(scan, &pad); + default_eob = seg_eob; if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; do { - const int band = get_coef_band(tx_size, c); + const int band = get_coef_band(scan, tx_size, c); int token; int v = 0; #if CONFIG_CODE_NONZEROCOUNT @@ -252,17 +278,20 @@ static void tokenize_b(VP9_COMP *cpi, // Skip zero node if there are no zeros left t->skip_eob_node = 1 + (zerosleft == 0); #else - t->skip_eob_node = (pt == 0) && (band > 0); + t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0); #endif assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { ++counts[type][ref][band][pt][token]; + if (!t->skip_eob_node) + ++cpi->common.fc.eob_branch_counts[tx_size][type][ref][band][pt]; } #if CONFIG_CODE_NONZEROCOUNT nzc += (v != 0); #endif + token_cache[c] = token; - pt = vp9_get_coef_context(&recent_energy, token); + pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); ++t; } while (c < eob && ++c < seg_eob); #if CONFIG_CODE_NONZEROCOUNT @@ -833,7 +862,7 @@ static void print_counter(FILE *f, vp9_coeff_accum *context_counters, assert(x == (int64_t) y); /* no overflow handling yet */ fprintf(f, "%s %d", Comma(t), y); - } while (++t < MAX_ENTROPY_TOKENS); + } while (++t < 1 + MAX_ENTROPY_TOKENS); fprintf(f, "}"); } while (++pt < PREV_COEF_CONTEXTS); fprintf(f, "\n }"); @@ -867,13 +896,17 @@ static void print_probs(FILE *f, vp9_coeff_accum *context_counters, pt = 0; do { unsigned int branch_ct[ENTROPY_NODES][2]; - unsigned int coef_counts[MAX_ENTROPY_TOKENS]; + unsigned int coef_counts[MAX_ENTROPY_TOKENS + 1]; vp9_prob coef_probs[ENTROPY_NODES]; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + if (pt >= 3 && band == 0) + break; + for (t = 0; t < MAX_ENTROPY_TOKENS + 1; ++t) coef_counts[t] = context_counters[type][ref][band][pt][t]; vp9_tree_probs_from_distribution(vp9_coef_tree, coef_probs, branch_ct, coef_counts, 0); + branch_ct[0][1] = coef_counts[MAX_ENTROPY_TOKENS] - branch_ct[0][0]; + coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]); fprintf(f, "%s\n {", Comma(pt)); t = 0; @@ -1025,7 +1058,7 @@ static void stuff_b(VP9_COMP *cpi, #if CONFIG_CODE_NONZEROCOUNT == 0 VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); - band = get_coef_band(tx_size, 0); + band = 0; t->Token = DCT_EOB_TOKEN; t->context_tree = probs[type][ref][band][pt]; t->skip_eob_node = 0; diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 464d7caf6..907f814ac 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -29,7 +29,7 @@ typedef struct { } TOKENEXTRA; typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] - [MAX_ENTROPY_TOKENS]; + [MAX_ENTROPY_TOKENS + 1]; int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd); int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c index 951ffa798..52da3c6ce 100644 --- a/vp9/encoder/vp9_treewriter.c +++ b/vp9/encoder/vp9_treewriter.c @@ -10,6 +10,7 @@ #include "vp9/encoder/vp9_treewriter.h" +#include "vp9/common/vp9_common.h" static void cost( int *const C, @@ -35,6 +36,7 @@ void vp9_cost_tokens(int *c, const vp9_prob *p, vp9_tree t) { } void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) { - c[0] = 0; + assert(t[1] > 0 && t[0] <= 0); + c[-t[0]] = vp9_cost_bit(p[0], 0); cost(c, t, p, 2, 0); } @@ -1488,7 +1488,7 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, const unsigned int bsize2 = bsize >> 1; unsigned int match = 1; unsigned int i, j; - yloc[0] = yloc[1] = -1; + yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { for (j = 0; match && j < img1->d_w; j += bsize) { int k, l; @@ -1502,13 +1502,17 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { yloc[0] = i + k; yloc[1] = j + l; + yloc[2] = *(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l); + yloc[3] = *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l); match = 0; break; } } } } - uloc[0] = uloc[1] = -1; + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) { for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) { int k, l; @@ -1522,13 +1526,17 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { uloc[0] = i + k; uloc[1] = j + l; + uloc[2] = *(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l); + uloc[3] = *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l); match = 0; break; } } } } - vloc[0] = vloc[1] = -1; + vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) { for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) { int k, l; @@ -1542,6 +1550,10 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { vloc[0] = i + k; vloc[1] = j + l; + vloc[2] = *(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l); + vloc[3] = *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l); match = 0; break; } @@ -2454,14 +2466,18 @@ static void test_decode(struct stream_state *stream, ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame"); if (!compare_img(&enc_img, &dec_img)) { - int y[2], u[2], v[2]; + int y[4], u[4], v[4]; find_mismatch(&enc_img, &dec_img, y, u, v); stream->decoder.err = 1; warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL, - "Stream %d: Encode/decode mismatch on frame %d" - " at Y[%d, %d], U[%d, %d], V[%d, %d]", + "Stream %d: Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}", stream->index, stream->frames_out, - y[0], y[1], u[0], u[1], v[0], v[1]); + y[0], y[1], y[2], y[3], + u[0], u[1], u[2], u[3], + v[0], v[1], v[2], v[3]); stream->mismatch_seen = stream->frames_out; } |