diff options
50 files changed, 2068 insertions, 5428 deletions
@@ -238,19 +238,17 @@ HAVE_LIST=" " EXPERIMENT_LIST=" csm - lossless new_mvref implicit_segmentation newbintramodes comp_interintra_pred tx64x64 - dwtdcthybrid cnvcontext - newcoefcontext enable_6tap abovesprefmv intht intht4x4 + intht16x16 " CONFIG_LIST=" external_build diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 827b13316..d14e0aa2f 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -36,7 +36,6 @@ static int round(double x) { } #endif -#if !CONFIG_DWTDCTHYBRID static const double kPi = 3.141592653589793238462643383279502884; static void reference2_32x32_idct_2d(double *input, double *output) { double x; @@ -127,9 +126,7 @@ TEST(VP9Idct32x32Test, AccuracyCheck) { } } } -#else // CONFIG_DWTDCTHYBRID - // TODO(rbultje/debargha): add DWT-specific tests -#endif // CONFIG_DWTDCTHYBRID + TEST(VP9Fdct32x32Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); unsigned int max_error = 0; diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 01fa63fdb..c3d6dae93 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -219,8 +219,4 @@ void vp9_initialize_common() { vp9_entropy_mode_init(); vp9_entropy_mv_init(); - -#if CONFIG_NEWCOEFCONTEXT - vp9_init_neighbors(); -#endif } diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 4ae8132bb..1eda3cc38 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -12,15 +12,15 @@ #include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" -const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25] = { - {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} +const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24] = { + {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6} }; -const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25] = { - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8}, - {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} +const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24] = { + {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7}, + {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6} }; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b84da812e..fb9cdfe2e 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -47,27 +47,13 @@ void vpx_log(const char *format, ...); #define MAX_MV_REFS 9 #define MAX_MV_REF_CANDIDATES 4 -#if CONFIG_DWTDCTHYBRID -#define DWT_MAX_LENGTH 64 -#define DWT_TYPE 26 // 26/53/97 -#define DWT_PRECISION_BITS 2 -#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) - -#define DWTDCT16X16 0 -#define DWTDCT16X16_LEAN 1 -#define DWTDCT8X8 2 -#define DWTDCT_TYPE DWTDCT16X16_LEAN -#endif - typedef struct { int r, c; } POS; -typedef enum PlaneType { - PLANE_TYPE_Y_NO_DC = 0, - PLANE_TYPE_Y2, - PLANE_TYPE_UV, +typedef enum { PLANE_TYPE_Y_WITH_DC, + PLANE_TYPE_UV, } PLANE_TYPE; typedef char ENTROPY_CONTEXT; @@ -75,10 +61,9 @@ typedef struct { ENTROPY_CONTEXT y1[4]; ENTROPY_CONTEXT u[2]; ENTROPY_CONTEXT v[2]; - ENTROPY_CONTEXT y2; } ENTROPY_CONTEXT_PLANES; -#define VP9_COMBINEENTROPYCONTEXTS( Dest, A, B) \ +#define VP9_COMBINEENTROPYCONTEXTS(Dest, A, B) \ Dest = ((A)!=0) + ((B)!=0); typedef enum { @@ -154,10 +139,7 @@ typedef enum { #define VP9_MVREFS (1 + SPLITMV - NEARESTMV) -#if CONFIG_LOSSLESS -#define WHT_UPSCALE_FACTOR 3 -#define Y2_WHT_UPSCALE_FACTOR 2 -#endif +#define WHT_UPSCALE_FACTOR 2 typedef enum { B_DC_PRED, /* average of above and left pixels */ @@ -300,23 +282,23 @@ typedef struct blockd { } BLOCKD; typedef struct superblockd { - /* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */ + /* 32x32 Y and 16x16 U/V */ DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]); DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]); DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]); } SUPERBLOCKD; typedef struct macroblockd { - DECLARE_ALIGNED(16, int16_t, diff[400]); /* from idct diff */ + DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */ DECLARE_ALIGNED(16, uint8_t, predictor[384]); - DECLARE_ALIGNED(16, int16_t, qcoeff[400]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[400]); - DECLARE_ALIGNED(16, uint16_t, eobs[25]); + DECLARE_ALIGNED(16, int16_t, qcoeff[384]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[384]); + DECLARE_ALIGNED(16, uint16_t, eobs[24]); SUPERBLOCKD sb_coeff_data; - /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ - BLOCKD block[25]; + /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ + BLOCKD block[24]; int fullpixel_mask; YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ @@ -333,7 +315,7 @@ typedef struct macroblockd { int left_available; int right_available; - /* Y,U,V,Y2 */ + /* Y,U,V */ ENTROPY_CONTEXT_PLANES *above_context; ENTROPY_CONTEXT_PLANES *left_context; @@ -383,11 +365,19 @@ typedef struct macroblockd { unsigned int frames_since_golden; unsigned int frames_till_alt_ref_frame; + int lossless; /* Inverse transform function pointers. */ - void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch); - void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch); - void (*inv_walsh4x4_1)(int16_t *in, int16_t *out); - void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out); + void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch); + void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch); + void (*itxm_add)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, int pitch, int stride); + void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride); + void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs); + void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride, + uint16_t *eobs); struct subpix_fn_table subpix; @@ -405,7 +395,7 @@ typedef struct macroblockd { #define ACTIVE_HT8 300 -#define ACTIVE_HT16 0 +#define ACTIVE_HT16 300 // convert MB_PREDICTION_MODE to B_PREDICTION_MODE static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { @@ -483,8 +473,8 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { return tx_type; } -extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25]; -extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25]; +extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24]; +extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24]; #define USE_ADST_FOR_I16X16_8X8 0 #define USE_ADST_FOR_I16X16_4X4 0 @@ -498,6 +488,8 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; + if (xd->lossless) + return DCT_DCT; // TODO(rbultje, debargha): Explore ADST usage for superblocks if (xd->mode_info_context->mbmi.sb_type) return tx_type; @@ -614,18 +606,8 @@ static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; } -static int get_2nd_order_usage(const MACROBLOCKD *xd) { - int has_2nd_order = (xd->mode_info_context->mbmi.mode != SPLITMV && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.txfm_size != TX_16X16); - if (has_2nd_order) - has_2nd_order = (get_tx_type(xd, xd->block) == DCT_DCT); - return has_2nd_order; -} - -extern void vp9_build_block_doffsets(MACROBLOCKD *xd); -extern void vp9_setup_block_dptrs(MACROBLOCKD *xd); +void vp9_build_block_doffsets(MACROBLOCKD *xd); +void vp9_setup_block_dptrs(MACROBLOCKD *xd); static void update_blockd_bmi(MACROBLOCKD *xd) { int i; diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 10d3c389f..c18712259 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -15,90 +15,48 @@ static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES_4X4] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 209, 89, 216, 242, 191, 190, 245, 191, 240, 235, 168 }, + { 142, 96, 196, 229, 173, 180, 233, 175, 247, 220, 174 }, + { 66, 89, 157, 205, 155, 171, 209, 156, 243, 200, 197 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band 1 */ - { 224, 180, 254, 255, 234, 224, 255, 227, 128, 128, 128 }, - { 187, 178, 250, 255, 226, 218, 255, 229, 255, 255, 128 }, - { 145, 171, 243, 253, 219, 211, 254, 226, 255, 224, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 159, 235, 246, 202, 197, 237, 186, 248, 223, 223 }, + { 96, 137, 223, 247, 203, 198, 242, 188, 241, 202, 209 }, + { 22, 95, 167, 243, 184, 196, 237, 187, 247, 221, 221 }, + { 3, 51, 81, 192, 125, 158, 220, 164, 242, 211, 197 } }, { /* Coeff Band 2 */ - { 1, 187, 252, 255, 231, 220, 255, 229, 255, 255, 128 }, - { 129, 174, 244, 254, 225, 216, 253, 219, 255, 255, 128 }, - { 16, 131, 193, 251, 205, 205, 254, 222, 255, 255, 128 }, - { 2, 93, 136, 236, 159, 179, 255, 197, 128, 128, 128 } + { 1, 145, 226, 244, 196, 194, 240, 191, 247, 225, 233 }, + { 66, 127, 203, 240, 188, 189, 239, 188, 248, 225, 220 }, + { 9, 83, 136, 224, 159, 176, 235, 177, 247, 223, 207 }, + { 2, 46, 71, 169, 121, 152, 210, 149, 241, 212, 199 } }, { /* Coeff Band 3 */ - { 1, 188, 254, 255, 241, 236, 254, 220, 255, 255, 128 }, - { 133, 165, 249, 255, 236, 220, 252, 220, 255, 255, 128 }, - { 20, 112, 203, 254, 217, 214, 255, 224, 255, 255, 128 }, - { 4, 61, 106, 240, 155, 189, 252, 202, 255, 255, 128 } + { 1, 174, 238, 249, 209, 201, 245, 198, 241, 196, 241 }, + { 76, 151, 223, 247, 203, 197, 245, 194, 243, 202, 198 }, + { 12, 102, 170, 240, 183, 187, 242, 191, 247, 225, 209 }, + { 1, 52, 85, 202, 135, 162, 225, 168, 240, 209, 221 } }, { /* Coeff Band 4 */ - { 1, 168, 252, 255, 239, 228, 253, 217, 255, 255, 128 }, - { 158, 163, 247, 255, 231, 221, 255, 242, 128, 128, 128 }, - { 23, 127, 205, 253, 212, 224, 255, 234, 255, 255, 128 }, - { 2, 83, 141, 237, 176, 210, 245, 207, 255, 255, 128 } + { 1, 140, 230, 247, 204, 198, 242, 190, 249, 209, 248 }, + { 94, 126, 213, 244, 195, 194, 240, 190, 247, 210, 237 }, + { 13, 95, 159, 232, 171, 181, 237, 179, 245, 205, 237 }, + { 1, 51, 83, 186, 128, 158, 216, 154, 240, 193, 229 } }, { /* Coeff Band 5 */ - { 1, 233, 254, 255, 243, 241, 255, 213, 128, 128, 128 }, - { 155, 213, 253, 255, 240, 221, 216, 112, 255, 255, 128 }, - { 41, 159, 237, 254, 229, 216, 255, 161, 128, 128, 128 }, - { 11, 95, 176, 244, 194, 191, 255, 167, 128, 128, 128 } + { 1, 218, 244, 251, 214, 202, 243, 199, 253, 214, 255 }, + { 91, 194, 238, 249, 210, 200, 247, 203, 251, 223, 255 }, + { 18, 140, 207, 247, 198, 194, 246, 203, 252, 213, 255 }, + { 3, 76, 126, 223, 156, 172, 233, 185, 251, 206, 255 } }, { /* Coeff Band 6 */ - { 1, 160, 253, 255, 238, 231, 255, 230, 255, 255, 128 }, - { 174, 152, 248, 255, 230, 223, 255, 223, 255, 255, 128 }, - { 86, 125, 213, 253, 207, 207, 254, 224, 255, 171, 128 }, - { 39, 89, 156, 240, 168, 190, 251, 181, 255, 255, 128 } + { 1, 135, 235, 250, 210, 203, 246, 206, 251, 219, 241 }, + { 105, 120, 214, 246, 196, 196, 245, 195, 250, 216, 243 }, + { 24, 91, 154, 231, 166, 180, 241, 183, 250, 214, 242 }, + { 3, 53, 84, 183, 127, 157, 218, 153, 244, 195, 237 } }, { /* Coeff Band 7 */ - { 1, 101, 255, 255, 243, 244, 255, 255, 128, 128, 128 }, - { 230, 66, 255, 255, 238, 238, 128, 128, 128, 128, 128 }, - { 151, 92, 229, 255, 224, 197, 128, 128, 128, 128, 128 }, - { 109, 57, 171, 255, 73, 255, 128, 128, 128, 128, 128 } + { 1, 83, 246, 252, 215, 208, 246, 206, 255, 237, 128 }, + { 184, 61, 233, 250, 208, 204, 245, 198, 254, 227, 255 }, + { 83, 58, 190, 246, 189, 195, 244, 198, 255, 229, 128 }, + { 41, 38, 125, 214, 144, 169, 229, 171, 251, 216, 255 } } }, { /* block Type 1 */ { /* Coeff Band 0 */ - { 148, 109, 219, 239, 203, 184, 222, 172, 238, 203, 192 }, - { 101, 110, 206, 229, 181, 178, 224, 171, 250, 206, 180 }, - { 67, 108, 186, 222, 172, 174, 216, 167, 246, 195, 221 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 1, 184, 249, 254, 226, 220, 253, 241, 255, 255, 128 }, - { 84, 182, 244, 254, 222, 218, 254, 217, 255, 255, 128 }, - { 56, 147, 210, 252, 208, 210, 253, 218, 255, 255, 128 }, - { 32, 124, 170, 233, 165, 178, 249, 196, 255, 253, 128 } - }, { /* Coeff Band 2 */ - { 1, 182, 242, 245, 208, 194, 239, 179, 255, 238, 128 }, - { 28, 170, 230, 241, 202, 192, 243, 171, 255, 243, 128 }, - { 16, 109, 165, 231, 182, 184, 237, 168, 255, 249, 255 }, - { 2, 76, 113, 202, 141, 172, 221, 160, 252, 227, 255 } - }, { /* Coeff Band 3 */ - { 1, 195, 249, 254, 230, 239, 251, 211, 255, 255, 128 }, - { 39, 164, 242, 254, 224, 222, 255, 235, 255, 255, 128 }, - { 16, 111, 179, 251, 204, 197, 251, 234, 255, 209, 128 }, - { 3, 84, 130, 225, 155, 176, 226, 196, 255, 238, 128 } - }, { /* Coeff Band 4 */ - { 1, 180, 248, 254, 227, 219, 254, 211, 255, 255, 128 }, - { 38, 170, 242, 253, 222, 214, 254, 242, 255, 255, 128 }, - { 5, 111, 176, 250, 204, 197, 255, 208, 128, 128, 128 }, - { 1, 75, 120, 233, 146, 186, 250, 203, 255, 255, 128 } - }, { /* Coeff Band 5 */ - { 1, 183, 251, 255, 232, 223, 252, 229, 255, 255, 128 }, - { 51, 158, 245, 255, 230, 224, 255, 239, 128, 128, 128 }, - { 13, 80, 158, 253, 206, 216, 255, 233, 128, 128, 128 }, - { 4, 39, 76, 212, 107, 153, 252, 206, 255, 255, 128 } - }, { /* Coeff Band 6 */ - { 1, 181, 252, 254, 231, 214, 242, 225, 255, 236, 128 }, - { 81, 167, 247, 254, 229, 217, 252, 226, 255, 255, 128 }, - { 20, 122, 195, 253, 213, 212, 249, 211, 255, 238, 128 }, - { 18, 100, 153, 231, 158, 182, 244, 203, 255, 219, 128 } - }, { /* Coeff Band 7 */ - { 1, 100, 254, 255, 242, 246, 255, 230, 128, 128, 128 }, - { 177, 62, 250, 255, 246, 210, 255, 255, 128, 128, 128 }, - { 65, 58, 186, 255, 227, 241, 255, 219, 128, 128, 128 }, - { 45, 23, 118, 244, 162, 208, 255, 228, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ { 242, 73, 238, 244, 198, 192, 241, 189, 253, 226, 247 }, { 171, 70, 204, 231, 180, 183, 228, 172, 247, 215, 221 }, { 73, 62, 144, 202, 153, 169, 207, 153, 245, 199, 230 }, @@ -139,179 +97,11 @@ static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES_4X4] = { { 105, 56, 192, 248, 192, 197, 252, 212, 255, 205, 128 }, { 53, 32, 133, 228, 151, 177, 250, 192, 255, 255, 128 } } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ - { 209, 89, 216, 242, 191, 190, 245, 191, 240, 235, 168 }, - { 142, 96, 196, 229, 173, 180, 233, 175, 247, 220, 174 }, - { 66, 89, 157, 205, 155, 171, 209, 156, 243, 200, 197 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 1, 159, 235, 246, 202, 197, 237, 186, 248, 223, 223 }, - { 96, 137, 223, 247, 203, 198, 242, 188, 241, 202, 209 }, - { 22, 95, 167, 243, 184, 196, 237, 187, 247, 221, 221 }, - { 3, 51, 81, 192, 125, 158, 220, 164, 242, 211, 197 } - }, { /* Coeff Band 2 */ - { 1, 145, 226, 244, 196, 194, 240, 191, 247, 225, 233 }, - { 66, 127, 203, 240, 188, 189, 239, 188, 248, 225, 220 }, - { 9, 83, 136, 224, 159, 176, 235, 177, 247, 223, 207 }, - { 2, 46, 71, 169, 121, 152, 210, 149, 241, 212, 199 } - }, { /* Coeff Band 3 */ - { 1, 174, 238, 249, 209, 201, 245, 198, 241, 196, 241 }, - { 76, 151, 223, 247, 203, 197, 245, 194, 243, 202, 198 }, - { 12, 102, 170, 240, 183, 187, 242, 191, 247, 225, 209 }, - { 1, 52, 85, 202, 135, 162, 225, 168, 240, 209, 221 } - }, { /* Coeff Band 4 */ - { 1, 140, 230, 247, 204, 198, 242, 190, 249, 209, 248 }, - { 94, 126, 213, 244, 195, 194, 240, 190, 247, 210, 237 }, - { 13, 95, 159, 232, 171, 181, 237, 179, 245, 205, 237 }, - { 1, 51, 83, 186, 128, 158, 216, 154, 240, 193, 229 } - }, { /* Coeff Band 5 */ - { 1, 218, 244, 251, 214, 202, 243, 199, 253, 214, 255 }, - { 91, 194, 238, 249, 210, 200, 247, 203, 251, 223, 255 }, - { 18, 140, 207, 247, 198, 194, 246, 203, 252, 213, 255 }, - { 3, 76, 126, 223, 156, 172, 233, 185, 251, 206, 255 } - }, { /* Coeff Band 6 */ - { 1, 135, 235, 250, 210, 203, 246, 206, 251, 219, 241 }, - { 105, 120, 214, 246, 196, 196, 245, 195, 250, 216, 243 }, - { 24, 91, 154, 231, 166, 180, 241, 183, 250, 214, 242 }, - { 3, 53, 84, 183, 127, 157, 218, 153, 244, 195, 237 } - }, { /* Coeff Band 7 */ - { 1, 83, 246, 252, 215, 208, 246, 206, 255, 237, 128 }, - { 184, 61, 233, 250, 208, 204, 245, 198, 254, 227, 255 }, - { 83, 58, 190, 246, 189, 195, 244, 198, 255, 229, 128 }, - { 41, 38, 125, 214, 144, 169, 229, 171, 251, 216, 255 } - } } }; -static const vp9_coeff_probs default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4] = { +static const vp9_coeff_probs default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4_HYBRID] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 1 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ { 191, 34, 178, 193, 160, 173, 196, 142, 247, 191, 244 }, { 84, 45, 129, 187, 145, 170, 189, 145, 240, 186, 212 }, { 14, 36, 69, 149, 120, 154, 177, 136, 231, 177, 196 }, @@ -357,90 +147,48 @@ static const vp9_coeff_probs default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4] = { static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES_8X8] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 140, 101, 214, 227, 176, 182, 218, 167, 233, 205, 164 }, + { 96, 101, 176, 204, 161, 173, 193, 152, 223, 182, 182 }, + { 27, 84, 123, 176, 140, 162, 190, 142, 238, 189, 210 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band 1 */ - { 179, 203, 246, 252, 217, 208, 249, 197, 238, 237, 255 }, - { 136, 193, 232, 247, 202, 199, 245, 194, 255, 235, 255 }, - { 66, 170, 209, 244, 190, 191, 250, 199, 255, 242, 192 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 178, 218, 240, 189, 189, 238, 184, 250, 232, 189 }, + { 69, 146, 204, 239, 187, 189, 238, 183, 251, 226, 221 }, + { 16, 98, 157, 234, 170, 185, 237, 183, 252, 220, 218 }, + { 3, 49, 78, 172, 122, 154, 204, 150, 242, 198, 207 } }, { /* Coeff Band 2 */ - { 1, 191, 232, 250, 204, 201, 248, 199, 254, 243, 213 }, - { 50, 161, 209, 247, 196, 197, 250, 206, 253, 240, 213 }, - { 6, 118, 160, 239, 173, 186, 249, 203, 254, 235, 255 }, - { 2, 90, 110, 211, 141, 166, 242, 181, 254, 235, 255 } + { 1, 165, 207, 230, 179, 181, 234, 172, 252, 228, 218 }, + { 25, 130, 175, 224, 169, 177, 232, 169, 252, 230, 207 }, + { 4, 81, 118, 205, 144, 167, 227, 162, 252, 225, 219 }, + { 2, 51, 63, 150, 114, 148, 197, 138, 244, 202, 204 } }, { /* Coeff Band 3 */ - { 1, 209, 242, 254, 223, 215, 253, 218, 255, 253, 128 }, - { 58, 168, 227, 253, 216, 211, 254, 226, 255, 251, 128 }, - { 7, 111, 178, 249, 195, 202, 253, 222, 254, 240, 255 }, - { 2, 63, 103, 226, 142, 175, 250, 202, 255, 246, 128 } + { 1, 181, 222, 247, 200, 197, 246, 199, 252, 232, 228 }, + { 25, 142, 200, 244, 190, 193, 245, 195, 253, 233, 204 }, + { 3, 90, 146, 233, 166, 181, 242, 188, 252, 229, 216 }, + { 1, 47, 79, 188, 124, 157, 222, 162, 245, 213, 203 } }, { /* Coeff Band 4 */ - { 1, 207, 241, 252, 213, 205, 252, 215, 255, 228, 255 }, - { 55, 171, 225, 251, 209, 205, 251, 212, 254, 234, 255 }, - { 5, 108, 173, 247, 187, 195, 251, 211, 255, 231, 128 }, - { 2, 56, 97, 220, 138, 169, 248, 191, 253, 237, 255 } + { 1, 179, 220, 242, 195, 191, 237, 182, 251, 217, 231 }, + { 27, 144, 200, 241, 188, 190, 238, 185, 250, 224, 235 }, + { 3, 93, 149, 230, 166, 180, 235, 180, 249, 222, 221 }, + { 1, 47, 79, 181, 125, 157, 211, 154, 241, 205, 198 } }, { /* Coeff Band 5 */ - { 1, 211, 245, 255, 227, 219, 255, 233, 255, 255, 128 }, - { 58, 175, 228, 254, 217, 215, 255, 231, 255, 255, 128 }, - { 6, 124, 181, 249, 191, 199, 255, 222, 255, 251, 128 }, - { 2, 85, 122, 227, 149, 172, 250, 195, 255, 245, 128 } + { 1, 176, 222, 247, 202, 198, 247, 199, 252, 234, 219 }, + { 24, 139, 197, 244, 190, 192, 246, 196, 253, 232, 220 }, + { 2, 89, 140, 229, 161, 178, 243, 185, 253, 233, 234 }, + { 1, 49, 76, 176, 121, 154, 214, 153, 243, 209, 208 } }, { /* Coeff Band 6 */ - { 1, 216, 246, 255, 231, 217, 254, 220, 255, 250, 128 }, - { 74, 177, 236, 254, 222, 214, 254, 221, 255, 255, 128 }, - { 13, 125, 192, 250, 200, 203, 254, 217, 255, 245, 128 }, - { 2, 70, 114, 227, 147, 175, 251, 198, 255, 240, 128 } + { 1, 197, 233, 251, 213, 205, 247, 206, 249, 222, 247 }, + { 35, 159, 216, 249, 203, 201, 246, 203, 250, 222, 223 }, + { 4, 108, 167, 240, 178, 188, 244, 195, 248, 220, 235 }, + { 1, 58, 93, 198, 133, 161, 220, 167, 233, 195, 221 } }, { /* Coeff Band 7 */ - { 1, 199, 246, 255, 238, 229, 255, 226, 255, 255, 128 }, - { 132, 162, 240, 255, 229, 222, 255, 239, 255, 255, 128 }, - { 79, 125, 207, 253, 213, 214, 255, 232, 255, 255, 128 }, - { 41, 89, 149, 240, 161, 187, 250, 216, 255, 255, 128 } + { 1, 188, 240, 253, 221, 209, 248, 207, 252, 223, 255 }, + { 84, 153, 227, 251, 212, 205, 247, 205, 254, 215, 255 }, + { 25, 117, 182, 244, 186, 192, 243, 198, 250, 209, 255 }, + { 7, 72, 108, 197, 138, 162, 203, 161, 240, 178, 247 } } }, { /* block Type 1 */ { /* Coeff Band 0 */ - { 138, 65, 189, 212, 172, 169, 200, 153, 233, 182, 214 }, - { 93, 60, 162, 203, 160, 169, 200, 153, 239, 190, 213 }, - { 66, 55, 141, 195, 152, 166, 199, 152, 238, 190, 212 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 1, 102, 221, 247, 205, 198, 248, 201, 255, 235, 128 }, - { 122, 95, 215, 247, 200, 197, 248, 200, 254, 227, 255 }, - { 60, 81, 166, 241, 177, 190, 245, 193, 255, 246, 255 }, - { 32, 61, 108, 195, 133, 159, 230, 163, 254, 230, 238 } - }, { /* Coeff Band 2 */ - { 1, 58, 203, 242, 194, 193, 229, 177, 253, 225, 249 }, - { 113, 62, 192, 237, 184, 187, 231, 181, 253, 220, 249 }, - { 50, 50, 135, 225, 159, 177, 229, 172, 254, 222, 241 }, - { 24, 34, 82, 185, 125, 152, 223, 158, 253, 212, 219 } - }, { /* Coeff Band 3 */ - { 1, 1, 220, 253, 218, 209, 251, 213, 255, 255, 128 }, - { 154, 1, 216, 252, 211, 206, 252, 212, 255, 252, 128 }, - { 102, 1, 157, 249, 184, 200, 253, 214, 255, 247, 128 }, - { 68, 1, 101, 213, 129, 161, 247, 186, 255, 237, 255 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ { 229, 64, 235, 236, 189, 190, 227, 179, 247, 203, 226 }, { 148, 70, 194, 228, 175, 182, 216, 170, 238, 192, 224 }, { 53, 63, 134, 207, 150, 169, 213, 161, 247, 204, 232 }, @@ -481,179 +229,11 @@ static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES_8X8] = { { 18, 109, 175, 247, 184, 195, 253, 211, 255, 250, 128 }, { 3, 64, 113, 219, 144, 171, 246, 187, 255, 250, 128 } } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ - { 140, 101, 214, 227, 176, 182, 218, 167, 233, 205, 164 }, - { 96, 101, 176, 204, 161, 173, 193, 152, 223, 182, 182 }, - { 27, 84, 123, 176, 140, 162, 190, 142, 238, 189, 210 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 1, 178, 218, 240, 189, 189, 238, 184, 250, 232, 189 }, - { 69, 146, 204, 239, 187, 189, 238, 183, 251, 226, 221 }, - { 16, 98, 157, 234, 170, 185, 237, 183, 252, 220, 218 }, - { 3, 49, 78, 172, 122, 154, 204, 150, 242, 198, 207 } - }, { /* Coeff Band 2 */ - { 1, 165, 207, 230, 179, 181, 234, 172, 252, 228, 218 }, - { 25, 130, 175, 224, 169, 177, 232, 169, 252, 230, 207 }, - { 4, 81, 118, 205, 144, 167, 227, 162, 252, 225, 219 }, - { 2, 51, 63, 150, 114, 148, 197, 138, 244, 202, 204 } - }, { /* Coeff Band 3 */ - { 1, 181, 222, 247, 200, 197, 246, 199, 252, 232, 228 }, - { 25, 142, 200, 244, 190, 193, 245, 195, 253, 233, 204 }, - { 3, 90, 146, 233, 166, 181, 242, 188, 252, 229, 216 }, - { 1, 47, 79, 188, 124, 157, 222, 162, 245, 213, 203 } - }, { /* Coeff Band 4 */ - { 1, 179, 220, 242, 195, 191, 237, 182, 251, 217, 231 }, - { 27, 144, 200, 241, 188, 190, 238, 185, 250, 224, 235 }, - { 3, 93, 149, 230, 166, 180, 235, 180, 249, 222, 221 }, - { 1, 47, 79, 181, 125, 157, 211, 154, 241, 205, 198 } - }, { /* Coeff Band 5 */ - { 1, 176, 222, 247, 202, 198, 247, 199, 252, 234, 219 }, - { 24, 139, 197, 244, 190, 192, 246, 196, 253, 232, 220 }, - { 2, 89, 140, 229, 161, 178, 243, 185, 253, 233, 234 }, - { 1, 49, 76, 176, 121, 154, 214, 153, 243, 209, 208 } - }, { /* Coeff Band 6 */ - { 1, 197, 233, 251, 213, 205, 247, 206, 249, 222, 247 }, - { 35, 159, 216, 249, 203, 201, 246, 203, 250, 222, 223 }, - { 4, 108, 167, 240, 178, 188, 244, 195, 248, 220, 235 }, - { 1, 58, 93, 198, 133, 161, 220, 167, 233, 195, 221 } - }, { /* Coeff Band 7 */ - { 1, 188, 240, 253, 221, 209, 248, 207, 252, 223, 255 }, - { 84, 153, 227, 251, 212, 205, 247, 205, 254, 215, 255 }, - { 25, 117, 182, 244, 186, 192, 243, 198, 250, 209, 255 }, - { 7, 72, 108, 197, 138, 162, 203, 161, 240, 178, 247 } - } } }; -static const vp9_coeff_probs default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] = { +static const vp9_coeff_probs default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8_HYBRID] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 1 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ { 118, 27, 105, 170, 137, 166, 183, 137, 243, 189, 241 }, { 44, 34, 85, 142, 127, 158, 161, 128, 232, 174, 213 }, { 8, 26, 47, 104, 108, 145, 143, 117, 226, 168, 207 }, @@ -699,90 +279,48 @@ static const vp9_coeff_probs default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] = { static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES_16X16] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 14, 78, 225, 217, 173, 181, 198, 153, 228, 185, 176 }, + { 9, 74, 179, 191, 157, 171, 178, 143, 229, 175, 209 }, + { 3, 48, 92, 128, 130, 155, 135, 123, 220, 155, 219 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 178, 209, 214, 173, 175, 208, 152, 252, 210, 237 }, + { 142, 151, 193, 212, 170, 175, 209, 151, 251, 208, 237 }, + { 38, 105, 150, 206, 159, 173, 208, 151, 250, 209, 238 }, + { 5, 44, 61, 128, 114, 147, 167, 125, 239, 184, 217 } }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 154, 195, 202, 166, 173, 184, 144, 245, 184, 236 }, + { 49, 110, 150, 188, 155, 168, 180, 141, 244, 183, 239 }, + { 4, 63, 90, 158, 132, 157, 171, 134, 243, 179, 239 }, + { 1, 25, 37, 93, 104, 141, 133, 114, 231, 161, 226 } }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 184, 201, 223, 173, 177, 224, 164, 253, 220, 238 }, + { 42, 127, 170, 215, 164, 173, 223, 162, 253, 219, 233 }, + { 4, 75, 114, 195, 142, 164, 218, 155, 253, 217, 235 }, + { 1, 32, 50, 128, 108, 144, 180, 127, 247, 197, 219 } }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 190, 207, 232, 181, 184, 228, 172, 251, 216, 212 }, + { 35, 136, 180, 227, 173, 180, 227, 171, 251, 216, 218 }, + { 2, 85, 131, 214, 154, 173, 224, 166, 250, 214, 225 }, + { 1, 44, 71, 162, 120, 153, 195, 143, 240, 195, 197 } }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 185, 201, 230, 177, 180, 232, 172, 253, 225, 235 }, + { 27, 122, 165, 221, 164, 175, 230, 169, 253, 224, 220 }, + { 1, 72, 108, 197, 139, 163, 224, 159, 253, 224, 226 }, + { 1, 33, 51, 132, 107, 144, 186, 130, 245, 201, 206 } }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 203, 214, 240, 193, 191, 235, 178, 252, 225, 224 }, + { 20, 140, 188, 235, 182, 186, 234, 177, 252, 226, 226 }, + { 1, 85, 132, 218, 155, 174, 230, 170, 251, 224, 227 }, + { 1, 39, 62, 154, 114, 150, 199, 141, 241, 203, 214 } }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 1, 217, 224, 244, 202, 193, 241, 187, 252, 227, 239 }, + { 22, 151, 200, 239, 187, 188, 240, 184, 252, 226, 237 }, + { 2, 90, 138, 222, 158, 174, 237, 176, 252, 226, 239 }, + { 1, 41, 66, 163, 116, 151, 206, 146, 243, 201, 230 } } }, { /* block Type 1 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ { 223, 34, 236, 234, 193, 185, 216, 169, 239, 189, 229 }, { 125, 40, 195, 221, 173, 175, 209, 165, 220, 181, 196 }, { 41, 37, 127, 185, 145, 162, 191, 150, 227, 180, 219 }, @@ -823,179 +361,11 @@ static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES_16X16] = { { 6, 106, 163, 240, 176, 188, 247, 198, 251, 222, 255 }, { 1, 51, 88, 196, 127, 159, 232, 169, 252, 214, 255 } } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ - { 14, 78, 225, 217, 173, 181, 198, 153, 228, 185, 176 }, - { 9, 74, 179, 191, 157, 171, 178, 143, 229, 175, 209 }, - { 3, 48, 92, 128, 130, 155, 135, 123, 220, 155, 219 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 1, 178, 209, 214, 173, 175, 208, 152, 252, 210, 237 }, - { 142, 151, 193, 212, 170, 175, 209, 151, 251, 208, 237 }, - { 38, 105, 150, 206, 159, 173, 208, 151, 250, 209, 238 }, - { 5, 44, 61, 128, 114, 147, 167, 125, 239, 184, 217 } - }, { /* Coeff Band 2 */ - { 1, 154, 195, 202, 166, 173, 184, 144, 245, 184, 236 }, - { 49, 110, 150, 188, 155, 168, 180, 141, 244, 183, 239 }, - { 4, 63, 90, 158, 132, 157, 171, 134, 243, 179, 239 }, - { 1, 25, 37, 93, 104, 141, 133, 114, 231, 161, 226 } - }, { /* Coeff Band 3 */ - { 1, 184, 201, 223, 173, 177, 224, 164, 253, 220, 238 }, - { 42, 127, 170, 215, 164, 173, 223, 162, 253, 219, 233 }, - { 4, 75, 114, 195, 142, 164, 218, 155, 253, 217, 235 }, - { 1, 32, 50, 128, 108, 144, 180, 127, 247, 197, 219 } - }, { /* Coeff Band 4 */ - { 1, 190, 207, 232, 181, 184, 228, 172, 251, 216, 212 }, - { 35, 136, 180, 227, 173, 180, 227, 171, 251, 216, 218 }, - { 2, 85, 131, 214, 154, 173, 224, 166, 250, 214, 225 }, - { 1, 44, 71, 162, 120, 153, 195, 143, 240, 195, 197 } - }, { /* Coeff Band 5 */ - { 1, 185, 201, 230, 177, 180, 232, 172, 253, 225, 235 }, - { 27, 122, 165, 221, 164, 175, 230, 169, 253, 224, 220 }, - { 1, 72, 108, 197, 139, 163, 224, 159, 253, 224, 226 }, - { 1, 33, 51, 132, 107, 144, 186, 130, 245, 201, 206 } - }, { /* Coeff Band 6 */ - { 1, 203, 214, 240, 193, 191, 235, 178, 252, 225, 224 }, - { 20, 140, 188, 235, 182, 186, 234, 177, 252, 226, 226 }, - { 1, 85, 132, 218, 155, 174, 230, 170, 251, 224, 227 }, - { 1, 39, 62, 154, 114, 150, 199, 141, 241, 203, 214 } - }, { /* Coeff Band 7 */ - { 1, 217, 224, 244, 202, 193, 241, 187, 252, 227, 239 }, - { 22, 151, 200, 239, 187, 188, 240, 184, 252, 226, 237 }, - { 2, 90, 138, 222, 158, 174, 237, 176, 252, 226, 239 }, - { 1, 41, 66, 163, 116, 151, 206, 146, 243, 201, 230 } - } } }; -static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] = { +static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16_HYBRID] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 1 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ { 3, 29, 86, 140, 130, 163, 135, 131, 190, 148, 186 }, { 1, 26, 61, 105, 124, 156, 105, 119, 178, 138, 173 }, { 1, 15, 28, 60, 105, 142, 80, 105, 173, 128, 178 }, @@ -1041,132 +411,6 @@ static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 1 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 2 */ - { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 1 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 2 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, { /* block Type 3 */ - { /* Coeff Band 0 */ { 8, 40, 224, 217, 183, 181, 180, 148, 200, 180, 123 }, { 6, 37, 178, 193, 173, 171, 160, 139, 205, 166, 173 }, { 3, 27, 93, 133, 143, 159, 115, 125, 183, 141, 178 }, diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 352e17c0c..ec96b4489 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license @@ -41,8 +41,12 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]) = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7 +// Unified coefficient band structure used by all block sizes +DECLARE_ALIGNED(16, const int, vp9_coef_bands[32]) = { + 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 6, 6, 6, 6, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 }; DECLARE_ALIGNED(16, const uint8_t, vp9_prev_token_class[MAX_ENTROPY_TOKENS]) = { @@ -70,17 +74,6 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 12, 13, 14, 15 }; -DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { - 0, 1, 2, 3, 5, 4, 4, 5, - 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7 -}; - DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, @@ -88,26 +81,6 @@ DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, }; -// Table can be optimized. -DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, 5, @@ -143,694 +116,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 237, 252, 253, 238, 223, 239, 254, 255, }; -#if CONFIG_DWTDCTHYBRID - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, - 65, 96, 128, 97, 66, 35, 4, 5, - 36, 67, 98, 129, 160, 192, 161, 130, - 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 256, 225, 194, 163, - 132, 101, 70, 39, 8, 9, 40, 71, - 102, 133, 164, 195, 226, 257, 288, 320, - 289, 258, 227, 196, 165, 134, 103, 72, - 41, 10, 11, 42, 73, 104, 135, 166, - 197, 228, 259, 290, 321, 352, 384, 353, - 322, 291, 260, 229, 198, 167, 136, 105, - 74, 43, 12, 13, 44, 75, 106, 137, - 168, 199, 230, 261, 292, 323, 354, 385, - 416, 448, 417, 386, 355, 324, 293, 262, - 231, 200, 169, 138, 107, 76, 45, 14, - 15, 46, 77, 108, 139, 170, 201, 232, - 263, 294, 325, 356, 387, 418, 449, 480, - 481, 450, 419, 388, 357, 326, 295, 264, - 233, 202, 171, 140, 109, 78, 47, 79, - 110, 141, 172, 203, 234, 265, 296, 327, - 358, 389, 420, 451, 482, 483, 452, 421, - 390, 359, 328, 297, 266, 235, 204, 173, - 142, 111, 143, 174, 205, 236, 267, 298, - 329, 360, 391, 422, 453, 484, 485, 454, - 423, 392, 361, 330, 299, 268, 237, 206, - 175, 207, 238, 269, 300, 331, 362, 393, - 424, 455, 486, 487, 456, 425, 394, 363, - 332, 301, 270, 239, 271, 302, 333, 364, - 395, 426, 457, 488, 489, 458, 427, 396, - 365, 334, 303, 335, 366, 397, 428, 459, - 490, 491, 460, 429, 398, 367, 399, 430, - 461, 492, 493, 462, 431, 463, 494, 495, - - 16, 512, 528, 17, 513, 529, 48, 544, - 560, 80, 576, 592, 49, 545, 561, 18, - 514, 530, 19, 515, 531, 50, 546, 562, - 81, 577, 593, 112, 608, 624, 144, 640, - 656, 113, 609, 625, 82, 578, 594, 51, - 547, 563, 20, 516, 532, 21, 517, 533, - 52, 548, 564, 83, 579, 595, 114, 610, - 626, 145, 641, 657, 176, 672, 688, 208, - 704, 720, 177, 673, 689, 146, 642, 658, - 115, 611, 627, 84, 580, 596, 53, 549, - 565, 22, 518, 534, 23, 519, 535, 54, - 550, 566, 85, 581, 597, 116, 612, 628, - 147, 643, 659, 178, 674, 690, 209, 705, - 721, 240, 736, 752, 272, 768, 784, 241, - 737, 753, 210, 706, 722, 179, 675, 691, - 148, 644, 660, 117, 613, 629, 86, 582, - 598, 55, 551, 567, 24, 520, 536, 25, - 521, 537, 56, 552, 568, 87, 583, 599, - 118, 614, 630, 149, 645, 661, 180, 676, - 692, 211, 707, 723, 242, 738, 754, 273, - 769, 785, 304, 800, 816, 336, 832, 848, - 305, 801, 817, 274, 770, 786, 243, 739, - 755, 212, 708, 724, 181, 677, 693, 150, - 646, 662, 119, 615, 631, 88, 584, 600, - 57, 553, 569, 26, 522, 538, 27, 523, - 539, 58, 554, 570, 89, 585, 601, 120, - 616, 632, 151, 647, 663, 182, 678, 694, - 213, 709, 725, 244, 740, 756, 275, 771, - 787, 306, 802, 818, 337, 833, 849, 368, - 864, 880, 400, 896, 912, 369, 865, 881, - 338, 834, 850, 307, 803, 819, 276, 772, - 788, 245, 741, 757, 214, 710, 726, 183, - - 679, 695, 152, 648, 664, 121, 617, 633, - 90, 586, 602, 59, 555, 571, 28, 524, - 540, 29, 525, 541, 60, 556, 572, 91, - 587, 603, 122, 618, 634, 153, 649, 665, - 184, 680, 696, 215, 711, 727, 246, 742, - 758, 277, 773, 789, 308, 804, 820, 339, - 835, 851, 370, 866, 882, 401, 897, 913, - 432, 928, 944, 464, 960, 976, 433, 929, - 945, 402, 898, 914, 371, 867, 883, 340, - 836, 852, 309, 805, 821, 278, 774, 790, - 247, 743, 759, 216, 712, 728, 185, 681, - 697, 154, 650, 666, 123, 619, 635, 92, - 588, 604, 61, 557, 573, 30, 526, 542, - 31, 527, 543, 62, 558, 574, 93, 589, - 605, 124, 620, 636, 155, 651, 667, 186, - 682, 698, 217, 713, 729, 248, 744, 760, - 279, 775, 791, 310, 806, 822, 341, 837, - 853, 372, 868, 884, 403, 899, 915, 434, - 930, 946, 465, 961, 977, 496, 992, 1008, - 497, 993, 1009, 466, 962, 978, 435, 931, - 947, 404, 900, 916, 373, 869, 885, 342, - 838, 854, 311, 807, 823, 280, 776, 792, - 249, 745, 761, 218, 714, 730, 187, 683, - 699, 156, 652, 668, 125, 621, 637, 94, - 590, 606, 63, 559, 575, 95, 591, 607, - 126, 622, 638, 157, 653, 669, 188, 684, - 700, 219, 715, 731, 250, 746, 762, 281, - 777, 793, 312, 808, 824, 343, 839, 855, - 374, 870, 886, 405, 901, 917, 436, 932, - 948, 467, 963, 979, 498, 994, 1010, 499, - 995, 1011, 468, 964, 980, 437, 933, 949, - 406, 902, 918, 375, 871, 887, 344, 840, - - 856, 313, 809, 825, 282, 778, 794, 251, - 747, 763, 220, 716, 732, 189, 685, 701, - 158, 654, 670, 127, 623, 639, 159, 655, - 671, 190, 686, 702, 221, 717, 733, 252, - 748, 764, 283, 779, 795, 314, 810, 826, - 345, 841, 857, 376, 872, 888, 407, 903, - 919, 438, 934, 950, 469, 965, 981, 500, - 996, 1012, 501, 997, 1013, 470, 966, 982, - 439, 935, 951, 408, 904, 920, 377, 873, - 889, 346, 842, 858, 315, 811, 827, 284, - 780, 796, 253, 749, 765, 222, 718, 734, - 191, 687, 703, 223, 719, 735, 254, 750, - 766, 285, 781, 797, 316, 812, 828, 347, - 843, 859, 378, 874, 890, 409, 905, 921, - 440, 936, 952, 471, 967, 983, 502, 998, - 1014, 503, 999, 1015, 472, 968, 984, 441, - 937, 953, 410, 906, 922, 379, 875, 891, - 348, 844, 860, 317, 813, 829, 286, 782, - 798, 255, 751, 767, 287, 783, 799, 318, - 814, 830, 349, 845, 861, 380, 876, 892, - 411, 907, 923, 442, 938, 954, 473, 969, - 985, 504, 1000, 1016, 505, 1001, 1017, 474, - 970, 986, 443, 939, 955, 412, 908, 924, - 381, 877, 893, 350, 846, 862, 319, 815, - 831, 351, 847, 863, 382, 878, 894, 413, - 909, 925, 444, 940, 956, 475, 971, 987, - 506, 1002, 1018, 507, 1003, 1019, 476, 972, - 988, 445, 941, 957, 414, 910, 926, 383, - 879, 895, 415, 911, 927, 446, 942, 958, - 477, 973, 989, 508, 1004, 1020, 509, 1005, - 1021, 478, 974, 990, 447, 943, 959, 479, - 975, 991, 510, 1006, 1022, 511, 1007, 1023, -}; - -#elif DWTDCT_TYPE == DWTDCT16X16 - -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, - 6, 6, 6, - 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, - 65, 96, 128, 97, 66, 35, 4, - 16, 512, 528, - 5, - 36, 67, 98, 129, 160, 192, 161, 130, - 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 256, 225, 194, 163, - 132, 101, 70, 39, 8, 9, 40, 71, - 102, 133, 164, 195, 226, 257, 288, 320, - 289, 258, 227, 196, 165, 134, 103, 72, - 41, 10, 11, 42, 73, 104, 135, 166, - 197, 228, 259, 290, 321, 352, 384, 353, - 322, 291, 260, 229, 198, 167, 136, 105, - 74, 43, 12, 13, 44, 75, 106, 137, - 168, 199, 230, 261, 292, 323, 354, 385, - 416, 448, 417, 386, 355, 324, 293, 262, - 231, 200, 169, 138, 107, 76, 45, 14, - 15, 46, 77, 108, 139, 170, 201, 232, - 263, 294, 325, 356, 387, 418, 449, 480, - 481, 450, 419, 388, 357, 326, 295, 264, - 233, 202, 171, 140, 109, 78, 47, 79, - 110, 141, 172, 203, 234, 265, 296, 327, - 358, 389, 420, 451, 482, 483, 452, 421, - 390, 359, 328, 297, 266, 235, 204, 173, - 142, 111, 143, 174, 205, 236, 267, 298, - 329, 360, 391, 422, 453, 484, 485, 454, - 423, 392, 361, 330, 299, 268, 237, 206, - 175, 207, 238, 269, 300, 331, 362, 393, - 424, 455, 486, 487, 456, 425, 394, 363, - 332, 301, 270, 239, 271, 302, 333, 364, - 395, 426, 457, 488, 489, 458, 427, 396, - 365, 334, 303, 335, 366, 397, 428, 459, - 490, 491, 460, 429, 398, 367, 399, 430, - 461, 492, 493, 462, 431, 463, 494, 495, - - 17, 513, 529, 48, 544, - 560, 80, 576, 592, 49, 545, 561, 18, - 514, 530, 19, 515, 531, 50, 546, 562, - 81, 577, 593, 112, 608, 624, 144, 640, - 656, 113, 609, 625, 82, 578, 594, 51, - 547, 563, 20, 516, 532, 21, 517, 533, - 52, 548, 564, 83, 579, 595, 114, 610, - 626, 145, 641, 657, 176, 672, 688, 208, - 704, 720, 177, 673, 689, 146, 642, 658, - 115, 611, 627, 84, 580, 596, 53, 549, - 565, 22, 518, 534, 23, 519, 535, 54, - 550, 566, 85, 581, 597, 116, 612, 628, - 147, 643, 659, 178, 674, 690, 209, 705, - 721, 240, 736, 752, 272, 768, 784, 241, - 737, 753, 210, 706, 722, 179, 675, 691, - 148, 644, 660, 117, 613, 629, 86, 582, - 598, 55, 551, 567, 24, 520, 536, 25, - 521, 537, 56, 552, 568, 87, 583, 599, - 118, 614, 630, 149, 645, 661, 180, 676, - 692, 211, 707, 723, 242, 738, 754, 273, - 769, 785, 304, 800, 816, 336, 832, 848, - 305, 801, 817, 274, 770, 786, 243, 739, - 755, 212, 708, 724, 181, 677, 693, 150, - 646, 662, 119, 615, 631, 88, 584, 600, - 57, 553, 569, 26, 522, 538, 27, 523, - 539, 58, 554, 570, 89, 585, 601, 120, - 616, 632, 151, 647, 663, 182, 678, 694, - 213, 709, 725, 244, 740, 756, 275, 771, - 787, 306, 802, 818, 337, 833, 849, 368, - 864, 880, 400, 896, 912, 369, 865, 881, - 338, 834, 850, 307, 803, 819, 276, 772, - 788, 245, 741, 757, 214, 710, 726, 183, - - 679, 695, 152, 648, 664, 121, 617, 633, - 90, 586, 602, 59, 555, 571, 28, 524, - 540, 29, 525, 541, 60, 556, 572, 91, - 587, 603, 122, 618, 634, 153, 649, 665, - 184, 680, 696, 215, 711, 727, 246, 742, - 758, 277, 773, 789, 308, 804, 820, 339, - 835, 851, 370, 866, 882, 401, 897, 913, - 432, 928, 944, 464, 960, 976, 433, 929, - 945, 402, 898, 914, 371, 867, 883, 340, - 836, 852, 309, 805, 821, 278, 774, 790, - 247, 743, 759, 216, 712, 728, 185, 681, - 697, 154, 650, 666, 123, 619, 635, 92, - 588, 604, 61, 557, 573, 30, 526, 542, - 31, 527, 543, 62, 558, 574, 93, 589, - 605, 124, 620, 636, 155, 651, 667, 186, - 682, 698, 217, 713, 729, 248, 744, 760, - 279, 775, 791, 310, 806, 822, 341, 837, - 853, 372, 868, 884, 403, 899, 915, 434, - 930, 946, 465, 961, 977, 496, 992, 1008, - 497, 993, 1009, 466, 962, 978, 435, 931, - 947, 404, 900, 916, 373, 869, 885, 342, - 838, 854, 311, 807, 823, 280, 776, 792, - 249, 745, 761, 218, 714, 730, 187, 683, - 699, 156, 652, 668, 125, 621, 637, 94, - 590, 606, 63, 559, 575, 95, 591, 607, - 126, 622, 638, 157, 653, 669, 188, 684, - 700, 219, 715, 731, 250, 746, 762, 281, - 777, 793, 312, 808, 824, 343, 839, 855, - 374, 870, 886, 405, 901, 917, 436, 932, - 948, 467, 963, 979, 498, 994, 1010, 499, - 995, 1011, 468, 964, 980, 437, 933, 949, - 406, 902, 918, 375, 871, 887, 344, 840, - - 856, 313, 809, 825, 282, 778, 794, 251, - 747, 763, 220, 716, 732, 189, 685, 701, - 158, 654, 670, 127, 623, 639, 159, 655, - 671, 190, 686, 702, 221, 717, 733, 252, - 748, 764, 283, 779, 795, 314, 810, 826, - 345, 841, 857, 376, 872, 888, 407, 903, - 919, 438, 934, 950, 469, 965, 981, 500, - 996, 1012, 501, 997, 1013, 470, 966, 982, - 439, 935, 951, 408, 904, 920, 377, 873, - 889, 346, 842, 858, 315, 811, 827, 284, - 780, 796, 253, 749, 765, 222, 718, 734, - 191, 687, 703, 223, 719, 735, 254, 750, - 766, 285, 781, 797, 316, 812, 828, 347, - 843, 859, 378, 874, 890, 409, 905, 921, - 440, 936, 952, 471, 967, 983, 502, 998, - 1014, 503, 999, 1015, 472, 968, 984, 441, - 937, 953, 410, 906, 922, 379, 875, 891, - 348, 844, 860, 317, 813, 829, 286, 782, - 798, 255, 751, 767, 287, 783, 799, 318, - 814, 830, 349, 845, 861, 380, 876, 892, - 411, 907, 923, 442, 938, 954, 473, 969, - 985, 504, 1000, 1016, 505, 1001, 1017, 474, - 970, 986, 443, 939, 955, 412, 908, 924, - 381, 877, 893, 350, 846, 862, 319, 815, - 831, 351, 847, 863, 382, 878, 894, 413, - 909, 925, 444, 940, 956, 475, 971, 987, - 506, 1002, 1018, 507, 1003, 1019, 476, 972, - 988, 445, 941, 957, 414, 910, 926, 383, - 879, 895, 415, 911, 927, 446, 942, 958, - 477, 973, 989, 508, 1004, 1020, 509, 1005, - 1021, 478, 974, 990, 447, 943, 959, 479, - 975, 991, 510, 1006, 1022, 511, 1007, 1023, -}; - -#elif DWTDCT_TYPE == DWTDCT8X8 - -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, - 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - - 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, - 65, 96, 128, 97, 66, 35, 4, 5, - 36, 67, 98, 129, 160, 192, 161, 130, - 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 225, 194, 163, 132, - 101, 70, 39, 71, 102, 133, 164, 195, - 226, 227, 196, 165, 134, 103, 135, 166, - 197, 228, 229, 198, 167, 199, 230, 231, - - 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328, - 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298, - 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361, - 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269, - 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393, - 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394, - 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270, - 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364, - 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488, - 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396, - 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335, - 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459, - 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429, - 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430, - 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462, - 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495, - - 16, 512, 528, 17, 513, 529, 18, 514, - 530, 19, 515, 531, 20, 516, 532, 21, - 517, 533, 22, 518, 534, 23, 519, 535, - 24, 520, 536, 25, 521, 537, 26, 522, - 538, 27, 523, 539, 28, 524, 540, 29, - 525, 541, 30, 526, 542, 31, 527, 543, - 48, 544, 560, 49, 545, 561, 50, 546, - 562, 51, 547, 563, 52, 548, 564, 53, - 549, 565, 54, 550, 566, 55, 551, 567, - 56, 552, 568, 57, 553, 569, 58, 554, - 570, 59, 555, 571, 60, 556, 572, 61, - 557, 573, 62, 558, 574, 63, 559, 575, - 80, 576, 592, 81, 577, 593, 82, 578, - 594, 83, 579, 595, 84, 580, 596, 85, - 581, 597, 86, 582, 598, 87, 583, 599, - 88, 584, 600, 89, 585, 601, 90, 586, - 602, 91, 587, 603, 92, 588, 604, 93, - 589, 605, 94, 590, 606, 95, 591, 607, - 112, 608, 624, 113, 609, 625, 114, 610, - 626, 115, 611, 627, 116, 612, 628, 117, - 613, 629, 118, 614, 630, 119, 615, 631, - 120, 616, 632, 121, 617, 633, 122, 618, - 634, 123, 619, 635, 124, 620, 636, 125, - 621, 637, 126, 622, 638, 127, 623, 639, - 144, 640, 656, 145, 641, 657, 146, 642, - 658, 147, 643, 659, 148, 644, 660, 149, - 645, 661, 150, 646, 662, 151, 647, 663, - 152, 648, 664, 153, 649, 665, 154, 650, - 666, 155, 651, 667, 156, 652, 668, 157, - 653, 669, 158, 654, 670, 159, 655, 671, - 176, 672, 688, 177, 673, 689, 178, 674, - 690, 179, 675, 691, 180, 676, 692, 181, - 677, 693, 182, 678, 694, 183, 679, 695, - 184, 680, 696, 185, 681, 697, 186, 682, - 698, 187, 683, 699, 188, 684, 700, 189, - 685, 701, 190, 686, 702, 191, 687, 703, - 208, 704, 720, 209, 705, 721, 210, 706, - 722, 211, 707, 723, 212, 708, 724, 213, - 709, 725, 214, 710, 726, 215, 711, 727, - 216, 712, 728, 217, 713, 729, 218, 714, - 730, 219, 715, 731, 220, 716, 732, 221, - 717, 733, 222, 718, 734, 223, 719, 735, - 240, 736, 752, 241, 737, 753, 242, 738, - 754, 243, 739, 755, 244, 740, 756, 245, - 741, 757, 246, 742, 758, 247, 743, 759, - 248, 744, 760, 249, 745, 761, 250, 746, - 762, 251, 747, 763, 252, 748, 764, 253, - 749, 765, 254, 750, 766, 255, 751, 767, - 272, 768, 784, 273, 769, 785, 274, 770, - 786, 275, 771, 787, 276, 772, 788, 277, - 773, 789, 278, 774, 790, 279, 775, 791, - 280, 776, 792, 281, 777, 793, 282, 778, - 794, 283, 779, 795, 284, 780, 796, 285, - 781, 797, 286, 782, 798, 287, 783, 799, - 304, 800, 816, 305, 801, 817, 306, 802, - 818, 307, 803, 819, 308, 804, 820, 309, - 805, 821, 310, 806, 822, 311, 807, 823, - 312, 808, 824, 313, 809, 825, 314, 810, - 826, 315, 811, 827, 316, 812, 828, 317, - 813, 829, 318, 814, 830, 319, 815, 831, - 336, 832, 848, 337, 833, 849, 338, 834, - 850, 339, 835, 851, 340, 836, 852, 341, - 837, 853, 342, 838, 854, 343, 839, 855, - 344, 840, 856, 345, 841, 857, 346, 842, - 858, 347, 843, 859, 348, 844, 860, 349, - 845, 861, 350, 846, 862, 351, 847, 863, - 368, 864, 880, 369, 865, 881, 370, 866, - 882, 371, 867, 883, 372, 868, 884, 373, - 869, 885, 374, 870, 886, 375, 871, 887, - 376, 872, 888, 377, 873, 889, 378, 874, - 890, 379, 875, 891, 380, 876, 892, 381, - 877, 893, 382, 878, 894, 383, 879, 895, - 400, 896, 912, 401, 897, 913, 402, 898, - 914, 403, 899, 915, 404, 900, 916, 405, - 901, 917, 406, 902, 918, 407, 903, 919, - 408, 904, 920, 409, 905, 921, 410, 906, - 922, 411, 907, 923, 412, 908, 924, 413, - 909, 925, 414, 910, 926, 415, 911, 927, - 432, 928, 944, 433, 929, 945, 434, 930, - 946, 435, 931, 947, 436, 932, 948, 437, - 933, 949, 438, 934, 950, 439, 935, 951, - 440, 936, 952, 441, 937, 953, 442, 938, - 954, 443, 939, 955, 444, 940, 956, 445, - 941, 957, 446, 942, 958, 447, 943, 959, - 464, 960, 976, 465, 961, 977, 466, 962, - 978, 467, 963, 979, 468, 964, 980, 469, - 965, 981, 470, 966, 982, 471, 967, 983, - 472, 968, 984, 473, 969, 985, 474, 970, - 986, 475, 971, 987, 476, 972, 988, 477, - 973, 989, 478, 974, 990, 479, 975, 991, - 496, 992, 1008, 497, 993, 1009, 498, 994, - 1010, 499, 995, 1011, 500, 996, 1012, 501, - 997, 1013, 502, 998, 1014, 503, 999, 1015, - 504, 1000, 1016, 505, 1001, 1017, 506, 1002, - 1018, 507, 1003, 1019, 508, 1004, 1020, 509, - 1005, 1021, 510, 1006, 1022, 511, 1007, 1023, -}; -#endif - -#else - -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, 37, 6, 7, 38, 69, 100, 131, 162, 193, 224, 256, 225, 194, 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, @@ -865,7 +150,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, }; -#endif // CONFIG_DWTDCTHYBRID /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -937,145 +221,28 @@ vp9_extra_bit_struct vp9_extra_bits[12] = { #include "vp9/common/vp9_default_coef_probs.h" -#if CONFIG_NEWCOEFCONTEXT - -// Neighborhood 5-tuples for various scans and blocksizes, -// in {top, left, topleft, topright, bottomleft} order -// for each position in raster scan order. -// -1 indicates the neighbor does not exist. -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_4x4_neighbors[16 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); - -static int find_in_scan(const int *scan, int l, int m) { - int i, l2 = l * l; - for (i = 0; i < l2; ++i) { - if (scan[i] == m) - return i; - } - return -1; -} - -static void init_scan_neighbors(const int *scan, int l, int *neighbors) { - int l2 = l * l; - int m, n, i, j, k; - for (n = 0; n < l2; ++n) { - int locn = find_in_scan(scan, l, n); - int z = -1; - i = n / l; - j = n % l; - for (k = 0; k < MAX_NEIGHBORS; ++k) - neighbors[MAX_NEIGHBORS * n + k] = -1; - if (i - 1 >= 0) { - m = (i - 1) * l + j; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n] = m; - if (m == 0) z = 0; - } - } - if (j - 1 >= 0) { - m = i * l + j - 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 1] = m; - if (m == 0) z = 1; - } - } - if (i - 1 >= 0 && j - 1 >= 0) { - m = (i - 1) * l + j - 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 2] = m; - if (m == 0) z = 2; - } - } - if (i - 1 >= 0 && j + 1 < l) { - m = (i - 1) * l + j + 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 3] = m; - if (m == 0) z = 3; - } - } - if (i + 1 < l && j - 1 >= 0) { - m = (i + 1) * l + j - 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 4] = m; - if (m == 0) z = 4; - } - } - if (z != -1) { // zero exists - int v = 0; - for (k = 0; k < MAX_NEIGHBORS; ++k) - v += (neighbors[MAX_NEIGHBORS * n + k] > 0); - if (v) { - neighbors[MAX_NEIGHBORS * n + z] = -1; - } - } - } -} - -void vp9_init_neighbors() { - init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4, - vp9_default_zig_zag1d_4x4_neighbors); - init_scan_neighbors(vp9_row_scan_4x4, 4, - vp9_row_scan_4x4_neighbors); - init_scan_neighbors(vp9_col_scan_4x4, 4, - vp9_col_scan_4x4_neighbors); - init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8, - vp9_default_zig_zag1d_8x8_neighbors); - init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, - vp9_default_zig_zag1d_16x16_neighbors); - init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, - vp9_default_zig_zag1d_32x32_neighbors); -} - -const int *vp9_get_coef_neighbors_handle(const int *scan) { - if (scan == vp9_default_zig_zag1d_4x4) { - return vp9_default_zig_zag1d_4x4_neighbors; - } else if (scan == vp9_row_scan_4x4) { - return vp9_row_scan_4x4_neighbors; - } else if (scan == vp9_col_scan_4x4) { - return vp9_col_scan_4x4_neighbors; - } else if (scan == vp9_default_zig_zag1d_8x8) { - return vp9_default_zig_zag1d_8x8_neighbors; - } else if (scan == vp9_default_zig_zag1d_16x16) { - return vp9_default_zig_zag1d_16x16_neighbors; - } else if (scan == vp9_default_zig_zag1d_32x32) { - return vp9_default_zig_zag1d_32x32_neighbors; +// This function updates and then returns n AC coefficient context +// This is currently a placeholder function to allow experimentation +// using various context models based on the energy earlier tokens +// within the current block. +// +// For now it just returns the previously used context. +int vp9_get_coef_context(int * recent_energy, int token) { + // int token_energy; + // int av_energy; + + // Placeholder code for experiments with token energy + // as a coefficient context. + /*token_energy = ((token != DCT_EOB_TOKEN) ? token : 0); + if (token_energy) { + av_energy = (token_energy + *recent_energy + 1) >> 1; + } else { + av_energy = 0; } - return vp9_default_zig_zag1d_4x4_neighbors; -} + *recent_energy = token_energy;*/ -int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc, - const int *neigbor_handle, int rc) { - static int neighbors_used = MAX_NEIGHBORS; // maximum is MAX_NEIGHBORS - const int *nb = neigbor_handle + rc * MAX_NEIGHBORS; - int i, v, val = 0, n = 0; - for (i = 0; i < neighbors_used; ++i) { - if (nb[i] == -1 || (nb[i] == 0 && nodc)) { - continue; - } - v = abs(qcoeff_ptr[nb[i]]); - val = (v > val ? v : val); - n++; - } - if (n == 0) - return 0; - else if (val <= 1) - return val; - else if (val < 4) - return 2; - else - return 3; -} -#endif /* CONFIG_NEWCOEFCONTEXT */ + return vp9_prev_token_class[token]; +}; void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4, @@ -1123,7 +290,7 @@ static void update_coef_probs(vp9_coeff_probs *dst_coef_probs, for (i = 0; i < block_types; ++i) for (j = 0; j < COEF_BANDS; ++j) for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + if (k >= 3 && j == 0) continue; vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, @@ -1222,21 +389,21 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { count_sat, update_factor); update_coef_probs(cm->fc.hybrid_coef_probs_4x4, cm->fc.pre_hybrid_coef_probs_4x4, - BLOCK_TYPES_4X4, cm->fc.hybrid_coef_counts_4x4, + BLOCK_TYPES_4X4_HYBRID, cm->fc.hybrid_coef_counts_4x4, count_sat, update_factor); update_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, BLOCK_TYPES_8X8, cm->fc.coef_counts_8x8, count_sat, update_factor); update_coef_probs(cm->fc.hybrid_coef_probs_8x8, cm->fc.pre_hybrid_coef_probs_8x8, - BLOCK_TYPES_8X8, cm->fc.hybrid_coef_counts_8x8, + BLOCK_TYPES_8X8_HYBRID, cm->fc.hybrid_coef_counts_8x8, count_sat, update_factor); update_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, BLOCK_TYPES_16X16, cm->fc.coef_counts_16x16, count_sat, update_factor); update_coef_probs(cm->fc.hybrid_coef_probs_16x16, cm->fc.pre_hybrid_coef_probs_16x16, - BLOCK_TYPES_16X16, cm->fc.hybrid_coef_counts_16x16, + BLOCK_TYPES_16X16_HYBRID, cm->fc.hybrid_coef_counts_16x16, count_sat, update_factor); update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32, diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 84e5255c2..8e9f60b26 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -59,23 +59,22 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ /* Coefficients are predicted via a 3-dimensional probability table. */ -/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ -#define BLOCK_TYPES_4X4 4 +/* Outside dimension. 0 = Y with DC, 1 = UV */ +#define BLOCK_TYPES_4X4 2 +#define BLOCK_TYPES_4X4_HYBRID 1 -#define BLOCK_TYPES_8X8 4 +#define BLOCK_TYPES_8X8 2 +#define BLOCK_TYPES_8X8_HYBRID 1 -#define BLOCK_TYPES_16X16 4 +#define BLOCK_TYPES_16X16 2 +#define BLOCK_TYPES_16X16_HYBRID 1 -#define BLOCK_TYPES_32X32 4 +#define BLOCK_TYPES_32X32 1 /* Middle dimension is a coarsening of the coefficient's position within the 4x4 DCT. */ #define COEF_BANDS 8 -extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]); -extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]); -extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]); -extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]); /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -106,9 +105,6 @@ typedef vp9_prob vp9_coeff_probs[COEF_BANDS][PREV_COEF_CONTEXTS] #define SUBEXP_PARAM 4 /* Subexponential code parameter */ #define MODULUS_PARAM 13 /* Modulus parameter */ -extern DECLARE_ALIGNED(16, const uint8_t, - vp9_prev_token_class[MAX_ENTROPY_TOKENS]); - struct VP9Common; void vp9_default_coef_probs(struct VP9Common *); extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]); @@ -129,26 +125,12 @@ static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } -#if CONFIG_NEWCOEFCONTEXT - -#define MAX_NEIGHBORS 5 -#define NEWCOEFCONTEXT_BAND_COND(b) ((b) >= 1) -void vp9_init_neighbors(void); - -const int *vp9_get_coef_neighbors_handle(const int *scan); -int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc, - const int *neigbor_handle, int rc); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_4x4_neighbors[ - 16 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_row_scan_4x4_neighbors[ - 16 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_col_scan_4x4_neighbors[ - 16 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[ - 64 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[ - 256 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[ - 1024 * MAX_NEIGHBORS]); -#endif // CONFIG_NEWCOEFCONTEXT +extern const int vp9_coef_bands[32]; +static int get_coef_band(int coef_index) { + if (coef_index < 32) + return vp9_coef_bands[coef_index]; + else + return 7; +} +extern int vp9_get_coef_context(int * recent_energy, int token); #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 01e8ea3c2..a9e663458 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -60,7 +60,7 @@ static const int sinpi_4_9 = 15212; static INLINE int dct_const_round_shift(int input) { int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS; - assert((rv <= INT16_MAX) && (rv >= INT16_MIN)); + assert(INT16_MIN <= rv && rv <= INT16_MAX); return rv; } #endif diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 2f847dc78..8eb98011f 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -298,122 +298,6 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch, } } -void vp9_short_inv_walsh4x4_c(int16_t *input, int16_t *output) { - int i; - int a1, b1, c1, d1; - int16_t *ip = input; - int16_t *op = output; - - for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[3])); - b1 = ((ip[1] + ip[2])); - c1 = ((ip[1] - ip[2])); - d1 = ((ip[0] - ip[3])); - - op[0] = (a1 + b1 + 1) >> 1; - op[1] = (c1 + d1) >> 1; - op[2] = (a1 - b1) >> 1; - op[3] = (d1 - c1) >> 1; - - ip += 4; - op += 4; - } - - ip = output; - op = output; - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; - ip++; - op++; - } -} - -void vp9_short_inv_walsh4x4_1_c(int16_t *in, int16_t *out) { - int i; - int16_t tmp[4]; - int16_t *ip = in; - int16_t *op = tmp; - - op[0] = (ip[0] + 1) >> 1; - op[1] = op[2] = op[3] = (ip[0] >> 1); - - ip = tmp; - op = out; - for (i = 0; i < 4; i++) { - op[0] = (ip[0] + 1) >> 1; - op[4] = op[8] = op[12] = (ip[0] >> 1); - ip++; - op++; - } -} - -#if CONFIG_LOSSLESS -void vp9_short_inv_walsh4x4_lossless_c(int16_t *input, int16_t *output) { - int i; - int a1, b1, c1, d1; - int16_t *ip = input; - int16_t *op = output; - - for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[3])) >> Y2_WHT_UPSCALE_FACTOR; - b1 = ((ip[1] + ip[2])) >> Y2_WHT_UPSCALE_FACTOR; - c1 = ((ip[1] - ip[2])) >> Y2_WHT_UPSCALE_FACTOR; - d1 = ((ip[0] - ip[3])) >> Y2_WHT_UPSCALE_FACTOR; - - op[0] = (a1 + b1 + 1) >> 1; - op[1] = (c1 + d1) >> 1; - op[2] = (a1 - b1) >> 1; - op[3] = (d1 - c1) >> 1; - - ip += 4; - op += 4; - } - - ip = output; - op = output; - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; - - - op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[4] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[8] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[12] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - - ip++; - op++; - } -} - -void vp9_short_inv_walsh4x4_1_lossless_c(int16_t *in, int16_t *out) { - int i; - int16_t tmp[4]; - int16_t *ip = in; - int16_t *op = tmp; - - op[0] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) + 1) >> 1; - op[1] = op[2] = op[3] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) >> 1); - - ip = tmp; - op = out; - for (i = 0; i < 4; i++) { - op[0] = ((ip[0] + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[4] = op[8] = op[12] = ((ip[0] >> 1)) << Y2_WHT_UPSCALE_FACTOR; - ip++; - op++; - } -} - void vp9_short_inv_walsh4x4_x8_c(int16_t *input, int16_t *output, int pitch) { int i; int a1, b1, c1, d1; @@ -476,12 +360,13 @@ void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) { } } -void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr, +void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride) { int r, c; - short tmp[16]; - vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1); + int16_t dc = input_dc; + int16_t tmp[16]; + vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1); for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { @@ -492,7 +377,6 @@ void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr, pred_ptr += pitch; } } -#endif void idct4_1d(int16_t *input, int16_t *output) { int16_t step[4]; @@ -912,20 +796,6 @@ void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) { *output = (out + 16) >> 5; } -void vp9_short_ihaar2x2_c(int16_t *input, int16_t *output, int pitch) { - int i; - int16_t *ip = input; // 0, 1, 4, 8 - int16_t *op = output; - for (i = 0; i < 16; i++) { - op[i] = 0; - } - - op[0] = (ip[0] + ip[1] + ip[4] + ip[8] + 1) >> 1; - op[1] = (ip[0] - ip[1] + ip[4] - ip[8]) >> 1; - op[4] = (ip[0] + ip[1] - ip[4] - ip[8]) >> 1; - op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1; -} - void idct16_1d(int16_t *input, int16_t *output) { int16_t step1[16], step2[16]; int temp1, temp2; @@ -1114,6 +984,231 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { } } +#if CONFIG_INTHT16X16 +void iadst16_1d(int16_t *input, int16_t *output) { + int x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; + + x0 = input[15]; + x1 = input[0]; + x2 = input[13]; + x3 = input[2]; + x4 = input[11]; + x5 = input[4]; + x6 = input[9]; + x7 = input[6]; + x8 = input[7]; + x9 = input[8]; + x10 = input[5]; + x11 = input[10]; + x12 = input[3]; + x13 = input[12]; + x14 = input[1]; + x15 = input[14]; + + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 + | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { + output[0] = output[1] = output[2] = output[3] = output[4] + = output[5] = output[6] = output[7] = output[8] + = output[9] = output[10] = output[11] = output[12] + = output[13] = output[14] = output[15] = 0; + return; + } + + // stage 1 + s0 = x0 * cospi_1_64 + x1 * cospi_31_64; + s1 = x0 * cospi_31_64 - x1 * cospi_1_64; + s2 = x2 * cospi_5_64 + x3 * cospi_27_64; + s3 = x2 * cospi_27_64 - x3 * cospi_5_64; + s4 = x4 * cospi_9_64 + x5 * cospi_23_64; + s5 = x4 * cospi_23_64 - x5 * cospi_9_64; + s6 = x6 * cospi_13_64 + x7 * cospi_19_64; + s7 = x6 * cospi_19_64 - x7 * cospi_13_64; + s8 = x8 * cospi_17_64 + x9 * cospi_15_64; + s9 = x8 * cospi_15_64 - x9 * cospi_17_64; + s10 = x10 * cospi_21_64 + x11 * cospi_11_64; + s11 = x10 * cospi_11_64 - x11 * cospi_21_64; + s12 = x12 * cospi_25_64 + x13 * cospi_7_64; + s13 = x12 * cospi_7_64 - x13 * cospi_25_64; + s14 = x14 * cospi_29_64 + x15 * cospi_3_64; + s15 = x14 * cospi_3_64 - x15 * cospi_29_64; + + x0 = dct_const_round_shift(s0 + s8); + x1 = dct_const_round_shift(s1 + s9); + x2 = dct_const_round_shift(s2 + s10); + x3 = dct_const_round_shift(s3 + s11); + x4 = dct_const_round_shift(s4 + s12); + x5 = dct_const_round_shift(s5 + s13); + x6 = dct_const_round_shift(s6 + s14); + x7 = dct_const_round_shift(s7 + s15); + x8 = dct_const_round_shift(s0 - s8); + x9 = dct_const_round_shift(s1 - s9); + x10 = dct_const_round_shift(s2 - s10); + x11 = dct_const_round_shift(s3 - s11); + x12 = dct_const_round_shift(s4 - s12); + x13 = dct_const_round_shift(s5 - s13); + x14 = dct_const_round_shift(s6 - s14); + x15 = dct_const_round_shift(s7 - s15); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4; + s5 = x5; + s6 = x6; + s7 = x7; + s8 = x8 * cospi_4_64 + x9 * cospi_28_64; + s9 = x8 * cospi_28_64 - x9 * cospi_4_64; + s10 = x10 * cospi_20_64 + x11 * cospi_12_64; + s11 = x10 * cospi_12_64 - x11 * cospi_20_64; + s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; + s13 = x12 * cospi_4_64 + x13 * cospi_28_64; + s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; + s15 = x14 * cospi_20_64 + x15 * cospi_12_64; + + x0 = s0 + s4; + x1 = s1 + s5; + x2 = s2 + s6; + x3 = s3 + s7; + x4 = s0 - s4; + x5 = s1 - s5; + x6 = s2 - s6; + x7 = s3 - s7; + x8 = dct_const_round_shift(s8 + s12); + x9 = dct_const_round_shift(s9 + s13); + x10 = dct_const_round_shift(s10 + s14); + x11 = dct_const_round_shift(s11 + s15); + x12 = dct_const_round_shift(s8 - s12); + x13 = dct_const_round_shift(s9 - s13); + x14 = dct_const_round_shift(s10 - s14); + x15 = dct_const_round_shift(s11 - s15); + + // stage 3 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4 * cospi_8_64 + x5 * cospi_24_64; + s5 = x4 * cospi_24_64 - x5 * cospi_8_64; + s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; + s7 = x6 * cospi_8_64 + x7 * cospi_24_64; + s8 = x8; + s9 = x9; + s10 = x10; + s11 = x11; + s12 = x12 * cospi_8_64 + x13 * cospi_24_64; + s13 = x12 * cospi_24_64 - x13 * cospi_8_64; + s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; + s15 = x14 * cospi_8_64 + x15 * cospi_24_64; + + x0 = s0 + s2; + x1 = s1 + s3; + x2 = s0 - s2; + x3 = s1 - s3; + x4 = dct_const_round_shift(s4 + s6); + x5 = dct_const_round_shift(s5 + s7); + x6 = dct_const_round_shift(s4 - s6); + x7 = dct_const_round_shift(s5 - s7); + x8 = s8 + s10; + x9 = s9 + s11; + x10 = s8 - s10; + x11 = s9 - s11; + x12 = dct_const_round_shift(s12 + s14); + x13 = dct_const_round_shift(s13 + s15); + x14 = dct_const_round_shift(s12 - s14); + x15 = dct_const_round_shift(s13 - s15); + + // stage 4 + s2 = (- cospi_16_64) * (x2 + x3); + s3 = cospi_16_64 * (x2 - x3); + s6 = cospi_16_64 * (x6 + x7); + s7 = cospi_16_64 * (- x6 + x7); + s10 = cospi_16_64 * (x10 + x11); + s11 = cospi_16_64 * (- x10 + x11); + s14 = (- cospi_16_64) * (x14 + x15); + s15 = cospi_16_64 * (x14 - x15); + + x2 = dct_const_round_shift(s2); + x3 = dct_const_round_shift(s3); + x6 = dct_const_round_shift(s6); + x7 = dct_const_round_shift(s7); + x10 = dct_const_round_shift(s10); + x11 = dct_const_round_shift(s11); + x14 = dct_const_round_shift(s14); + x15 = dct_const_round_shift(s15); + + output[0] = x0; + output[1] = - x8; + output[2] = x12; + output[3] = - x4; + output[4] = x6; + output[5] = x14; + output[6] = x10; + output[7] = x2; + output[8] = x3; + output[9] = x11; + output[10] = x15; + output[11] = x7; + output[12] = x5; + output[13] = - x13; + output[14] = x9; + output[15] = - x1; +} + +void vp9_short_iht16x16_c(int16_t *input, int16_t *output, + int pitch, TX_TYPE tx_type) { + int16_t out[16 * 16]; + int16_t *outptr = &out[0]; + const int short_pitch = pitch >> 1; + int i, j; + int16_t temp_in[16], temp_out[16]; + + void (*invr)(int16_t*, int16_t*); + void (*invc)(int16_t*, int16_t*); + + switch (tx_type) { + case ADST_ADST: + invc = &iadst16_1d; + invr = &iadst16_1d; + break; + case ADST_DCT: + invc = &iadst16_1d; + invr = &idct16_1d; + break; + case DCT_ADST: + invc = &idct16_1d; + invr = &iadst16_1d; + break; + case DCT_DCT: + invc = &idct16_1d; + invr = &idct16_1d; + break; + default: + assert(0); + } + + // inverse transform row vectors + for (i = 0; i < 16; ++i) { + invr(input, outptr); + input += short_pitch; + outptr += 16; + } + + // inverse transform column vectors + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + invc(temp_in, temp_out); + for (j = 0; j < 16; ++j) + output[j * 16 + i] = (temp_out[j] + 32) >> 6; + } +} +#endif + + + void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { int16_t out[16 * 16]; int16_t *outptr = &out[0]; @@ -1152,8 +1247,6 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { *output = (out + 32) >> 6; } - -#if !CONFIG_DWTDCTHYBRID void idct32_1d(int16_t *input, int16_t *output) { int16_t step1[32], step2[32]; int temp1, temp2; @@ -1521,7 +1614,6 @@ void idct32_1d(int16_t *input, int16_t *output) { output[31] = step1[0] - step1[31]; } - void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { int16_t out[32 * 32]; int16_t *outptr = &out[0]; @@ -1554,792 +1646,3 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { out = dct_const_round_shift(tmp); *output = (out + 32) >> 6; } - -#else // !CONFIG_DWTDCTHYBRID - -#if DWT_TYPE == 53 - -// Note: block length must be even for this implementation -static void synthesis_53_row(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, *a, *b; - int n; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ -= (r + (*b) + 1) >> 1; - r = *b++; - } - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *x++ = ((r = *a++) + 1) >> 1; - *x++ = *b++ + ((r + (*a) + 2) >> 2); - } - *x++ = ((r = *a) + 1) >> 1; - *x++ = *b + ((r + 1) >> 1); -} - -static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, *a, *b; - int n; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ -= (r + (*b) + 1) >> 1; - r = *b++; - } - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - r = *a++; - *x++ = r; - *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1); - } - *x++ = *a; - *x++ = ((*b) << 1) + *a; -} - -static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { - int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; - - th[0] = hh; - tw[0] = hw; - for (i = 1; i <= levels; i++) { - th[i] = (th[i - 1] + 1) >> 1; - tw[i] = (tw[i - 1] + 1) >> 1; - } - for (lv = levels - 1; lv >= 0; lv--) { - nh = th[lv]; - nw = tw[lv]; - hh = th[lv + 1]; - hw = tw[lv + 1]; - if ((nh < 2) || (nw < 2)) continue; - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i] = c[i * pitch_c + j]; - synthesis_53_col(nh, buffer, buffer + hh, buffer + nh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i + nh]; - } - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer)); - synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]); - } - } - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ? - ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) : - -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS); - } - } -} - -#elif DWT_TYPE == 26 - -// Note: block length must be even for this implementation -static void synthesis_26_row(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, s, *a, *b; - int i, n = length >> 1; - - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ += (r - a[1] + 4) >> 3; - r = *a++; - } - *b += (r - *a + 4) >> 3; - } - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - s = *b++; - r = *a++; - *x++ = (r + s + 1) >> 1; - *x++ = (r - s + 1) >> 1; - } -} - -static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, s, *a, *b; - int i, n = length >> 1; - - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ += (r - a[1] + 4) >> 3; - r = *a++; - } - *b += (r - *a + 4) >> 3; - } - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - s = *b++; - r = *a++; - *x++ = r + s; - *x++ = r - s; - } -} - -static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { - int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - int16_t buffer[2 * DWT_MAX_LENGTH]; - - th[0] = hh; - tw[0] = hw; - for (i = 1; i <= levels; i++) { - th[i] = (th[i - 1] + 1) >> 1; - tw[i] = (tw[i - 1] + 1) >> 1; - } - for (lv = levels - 1; lv >= 0; lv--) { - nh = th[lv]; - nw = tw[lv]; - hh = th[lv + 1]; - hw = tw[lv + 1]; - if ((nh < 2) || (nw < 2)) continue; - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i] = c[i * pitch_c + j]; - synthesis_26_col(nh, buffer, buffer + hh, buffer + nh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i + nh]; - } - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer)); - synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]); - } - } - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ? - ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) : - -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS); - } - } -} - -#elif DWT_TYPE == 97 - -static void synthesis_97(int length, double *lowpass, double *highpass, - double *x) { - static const double a_predict1 = -1.586134342; - static const double a_update1 = -0.05298011854; - static const double a_predict2 = 0.8829110762; - static const double a_update2 = 0.4435068522; - static const double s_low = 1.149604398; - static const double s_high = 1/1.149604398; - static const double inv_s_low = 1 / s_low; - static const double inv_s_high = 1 / s_high; - int i; - double y[DWT_MAX_LENGTH]; - // Undo pack and scale - for (i = 0; i < length / 2; i++) { - y[i * 2] = lowpass[i] * inv_s_low; - y[i * 2 + 1] = highpass[i] * inv_s_high; - } - memcpy(x, y, sizeof(*y) * length); - // Undo update 2 - for (i = 2; i < length; i += 2) { - x[i] -= a_update2 * (x[i-1] + x[i+1]); - } - x[0] -= 2 * a_update2 * x[1]; - // Undo predict 2 - for (i = 1; i < length - 2; i += 2) { - x[i] -= a_predict2 * (x[i - 1] + x[i + 1]); - } - x[length - 1] -= 2 * a_predict2 * x[length - 2]; - // Undo update 1 - for (i = 2; i < length; i += 2) { - x[i] -= a_update1 * (x[i - 1] + x[i + 1]); - } - x[0] -= 2 * a_update1 * x[1]; - // Undo predict 1 - for (i = 1; i < length - 2; i += 2) { - x[i] -= a_predict1 * (x[i - 1] + x[i + 1]); - } - x[length - 1] -= 2 * a_predict1 * x[length - 2]; -} - -static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { - int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - double buffer[2 * DWT_MAX_LENGTH]; - double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; - - th[0] = hh; - tw[0] = hw; - for (i = 1; i <= levels; i++) { - th[i] = (th[i - 1] + 1) >> 1; - tw[i] = (tw[i - 1] + 1) >> 1; - } - for (lv = levels - 1; lv >= 0; lv--) { - nh = th[lv]; - nw = tw[lv]; - hh = th[lv + 1]; - hw = tw[lv + 1]; - if ((nh < 2) || (nw < 2)) continue; - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i] = c[i * pitch_c + j]; - synthesis_97(nh, buffer, buffer + hh, buffer + nh); - for (i = 0; i < nh; i++) - y[i * DWT_MAX_LENGTH + j] = buffer[i + nh]; - } - for (i = 0; i < nh; i++) { - memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer)); - synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]); - } - } - for (i = 0; i < height; i++) - for (j = 0; j < width; j++) - x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] / - (1 << DWT_PRECISION_BITS)); -} - -#endif // DWT_TYPE - -// TODO(debargha): Implement scaling differently so as not to have to use the -// floating point 16x16 dct -static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) { - static const double C1 = 0.995184726672197; - static const double C2 = 0.98078528040323; - static const double C3 = 0.956940335732209; - static const double C4 = 0.923879532511287; - static const double C5 = 0.881921264348355; - static const double C6 = 0.831469612302545; - static const double C7 = 0.773010453362737; - static const double C8 = 0.707106781186548; - static const double C9 = 0.634393284163646; - static const double C10 = 0.555570233019602; - static const double C11 = 0.471396736825998; - static const double C12 = 0.38268343236509; - static const double C13 = 0.290284677254462; - static const double C14 = 0.195090322016128; - static const double C15 = 0.098017140329561; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double step[16]; - double intermediate[16]; - double temp1, temp2; - - - // step 1 and 2 - step[ 0] = input[0] + input[8]; - step[ 1] = input[0] - input[8]; - - temp1 = input[4]*C12; - temp2 = input[12]*C4; - - temp1 -= temp2; - temp1 *= C8; - - step[ 2] = 2*(temp1); - - temp1 = input[4]*C4; - temp2 = input[12]*C12; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - step[ 3] = 2*(temp1); - - temp1 = input[2]*C8; - temp1 = 2*(temp1); - temp2 = input[6] + input[10]; - - step[ 4] = temp1 + temp2; - step[ 5] = temp1 - temp2; - - temp1 = input[14]*C8; - temp1 = 2*(temp1); - temp2 = input[6] - input[10]; - - step[ 6] = temp2 - temp1; - step[ 7] = temp2 + temp1; - - // for odd input - temp1 = input[3]*C12; - temp2 = input[13]*C4; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - intermediate[ 8] = 2*(temp1); - - temp1 = input[3]*C4; - temp2 = input[13]*C12; - temp2 -= temp1; - temp2 = (temp2); - temp2 *= C8; - intermediate[ 9] = 2*(temp2); - - intermediate[10] = 2*(input[9]*C8); - intermediate[11] = input[15] - input[1]; - intermediate[12] = input[15] + input[1]; - intermediate[13] = 2*((input[7]*C8)); - - temp1 = input[11]*C12; - temp2 = input[5]*C4; - temp2 -= temp1; - temp2 = (temp2); - temp2 *= C8; - intermediate[14] = 2*(temp2); - - temp1 = input[11]*C4; - temp2 = input[5]*C12; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - intermediate[15] = 2*(temp1); - - step[ 8] = intermediate[ 8] + intermediate[14]; - step[ 9] = intermediate[ 9] + intermediate[15]; - step[10] = intermediate[10] + intermediate[11]; - step[11] = intermediate[10] - intermediate[11]; - step[12] = intermediate[12] + intermediate[13]; - step[13] = intermediate[12] - intermediate[13]; - step[14] = intermediate[ 8] - intermediate[14]; - step[15] = intermediate[ 9] - intermediate[15]; - - // step 3 - output[0] = step[ 0] + step[ 3]; - output[1] = step[ 1] + step[ 2]; - output[2] = step[ 1] - step[ 2]; - output[3] = step[ 0] - step[ 3]; - - temp1 = step[ 4]*C14; - temp2 = step[ 7]*C2; - temp1 -= temp2; - output[4] = (temp1); - - temp1 = step[ 4]*C2; - temp2 = step[ 7]*C14; - temp1 += temp2; - output[7] = (temp1); - - temp1 = step[ 5]*C10; - temp2 = step[ 6]*C6; - temp1 -= temp2; - output[5] = (temp1); - - temp1 = step[ 5]*C6; - temp2 = step[ 6]*C10; - temp1 += temp2; - output[6] = (temp1); - - output[8] = step[ 8] + step[11]; - output[9] = step[ 9] + step[10]; - output[10] = step[ 9] - step[10]; - output[11] = step[ 8] - step[11]; - output[12] = step[12] + step[15]; - output[13] = step[13] + step[14]; - output[14] = step[13] - step[14]; - output[15] = step[12] - step[15]; - - // output 4 - step[ 0] = output[0] + output[7]; - step[ 1] = output[1] + output[6]; - step[ 2] = output[2] + output[5]; - step[ 3] = output[3] + output[4]; - step[ 4] = output[3] - output[4]; - step[ 5] = output[2] - output[5]; - step[ 6] = output[1] - output[6]; - step[ 7] = output[0] - output[7]; - - temp1 = output[8]*C7; - temp2 = output[15]*C9; - temp1 -= temp2; - step[ 8] = (temp1); - - temp1 = output[9]*C11; - temp2 = output[14]*C5; - temp1 += temp2; - step[ 9] = (temp1); - - temp1 = output[10]*C3; - temp2 = output[13]*C13; - temp1 -= temp2; - step[10] = (temp1); - - temp1 = output[11]*C15; - temp2 = output[12]*C1; - temp1 += temp2; - step[11] = (temp1); - - temp1 = output[11]*C1; - temp2 = output[12]*C15; - temp2 -= temp1; - step[12] = (temp2); - - temp1 = output[10]*C13; - temp2 = output[13]*C3; - temp1 += temp2; - step[13] = (temp1); - - temp1 = output[9]*C5; - temp2 = output[14]*C11; - temp2 -= temp1; - step[14] = (temp2); - - temp1 = output[8]*C9; - temp2 = output[15]*C7; - temp1 += temp2; - step[15] = (temp1); - - // step 5 - output[0] = (step[0] + step[15]); - output[1] = (step[1] + step[14]); - output[2] = (step[2] + step[13]); - output[3] = (step[3] + step[12]); - output[4] = (step[4] + step[11]); - output[5] = (step[5] + step[10]); - output[6] = (step[6] + step[ 9]); - output[7] = (step[7] + step[ 8]); - - output[15] = (step[0] - step[15]); - output[14] = (step[1] - step[14]); - output[13] = (step[2] - step[13]); - output[12] = (step[3] - step[12]); - output[11] = (step[4] - step[11]); - output[10] = (step[5] - step[10]); - output[9] = (step[6] - step[ 9]); - output[8] = (step[7] - step[ 8]); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch, - int scale) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double out[16*16], out2[16*16]; - const int short_pitch = pitch >> 1; - int i, j; - // First transform rows - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = input[j + i*short_pitch]; - butterfly_16x16_idct_1d_f(temp_in, temp_out); - for (j = 0; j < 16; ++j) - out[j + i*16] = temp_out[j]; - } - // Then transform columns - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - butterfly_16x16_idct_1d_f(temp_in, temp_out); - for (j = 0; j < 16; ++j) - out2[j*16 + i] = temp_out[j]; - } - for (i = 0; i < 16*16; ++i) - output[i] = round(out2[i] / (128 >> scale)); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -static void idct8_1d_f(double *x) { - int i, j; - double t[8]; - static const double idctmat[64] = { - 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127, - 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064, - 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064, - -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098, - 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162, - -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127, - 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098, - 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162, - 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098, - 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162, - 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161, - -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127, - 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065, - -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098, - 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127, - 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064 - }; - for (i = 0; i < 8; ++i) { - t[i] = 0; - for (j = 0; j < 8; ++j) - t[i] += idctmat[i * 8 + j] * x[j]; - } - for (i = 0; i < 8; ++i) { - x[i] = t[i]; - } -} - -static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch, - int scale) { - double X[8 * 8], Y[8]; - int i, j; - int shortpitch = pitch >> 1; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - X[i * 8 + j] = (double)coefs[i * shortpitch + j]; - } - } - for (i = 0; i < 8; i++) - idct8_1d_f(X + 8 * i); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; ++j) - Y[j] = X[i + 8 * j]; - idct8_1d_f(Y); - for (j = 0; j < 8; ++j) - X[i + 8 * j] = Y[j]; - } - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale)); - } - } - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n)) - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 32x32 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[16 * 16]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[32 * 32]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); - } - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } -#if DWT_TYPE == 26 - dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); -#endif -} - -#elif DWTDCT_TYPE == DWTDCT16X16 - -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 32x32 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[16 * 16]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[32 * 32]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16, - sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16, - sizeof(*buffer2) * 16); - } -#if DWT_TYPE == 26 - dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); -#endif -} - -#elif DWTDCT_TYPE == DWTDCT8X8 - -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 32x32 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[8 * 8]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[32 * 32]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct8x8_c_f(input, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8); - } - vp9_short_idct8x8_c_f(input + 8, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8); - } - vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8, - sizeof(*buffer2) * 8); - } - vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8, - sizeof(*buffer2) * 8); - } - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } -#if DWT_TYPE == 26 - dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32); -#endif -} - -#endif - -#if CONFIG_TX64X64 -void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 64x64 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[16 * 16]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[64 * 64]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16); - } -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - for (i = 0; i < 16; ++i) { - for (j = 16; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 16; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } -#elif DWTDCT_TYPE == DWTDCT16X16 - vp9_short_idct16x16_c_f(input + 16, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16, - sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16, - sizeof(*buffer2) * 16); - } - - // Copying and scaling highest bands into buffer2 - for (i = 0; i < 32; ++i) { - for (j = 32; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 32; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } -#endif // DWTDCT_TYPE - -#if DWT_TYPE == 26 - dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64); -#endif -} -#endif // CONFIG_TX64X64 -#endif // !CONFIG_DWTDCTHYBRID diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index e7cfe207b..25b59cc5d 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -11,42 +11,16 @@ #include "vp9/common/vp9_invtrans.h" #include "./vp9_rtcd.h" -static void recon_dcblock(MACROBLOCKD *xd) { - BLOCKD *b = &xd->block[24]; - int i; - - for (i = 0; i < 16; i++) { - xd->block[i].dqcoeff[0] = b->diff[i]; - } -} - -static void recon_dcblock_8x8(MACROBLOCKD *xd) { - BLOCKD *b = &xd->block[24]; // for coeff 0, 2, 8, 10 - - xd->block[0].dqcoeff[0] = b->diff[0]; - xd->block[4].dqcoeff[0] = b->diff[1]; - xd->block[8].dqcoeff[0] = b->diff[4]; - xd->block[12].dqcoeff[0] = b->diff[8]; -} - void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) { BLOCKD *b = &xd->block[block]; if (b->eob <= 1) - xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch); + xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch); else - xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch); + xd->inv_txm4x4(b->dqcoeff, b->diff, pitch); } void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { int i; - BLOCKD *blockd = xd->block; - int has_2nd_order = get_2nd_order_usage(xd); - - if (has_2nd_order) { - /* do 2nd order transform on the dc block */ - vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff); - recon_dcblock(xd); - } for (i = 0; i < 16; i++) { TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); @@ -85,13 +59,6 @@ void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, int16_t *output_coeff, void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { int i; BLOCKD *blockd = xd->block; - int has_2nd_order = get_2nd_order_usage(xd); - - if (has_2nd_order) { - // do 2nd order transform on the dc block - vp9_short_ihaar2x2(blockd[24].dqcoeff, blockd[24].diff, 8); - recon_dcblock_8x8(xd); // need to change for 8x8 - } for (i = 0; i < 9; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); @@ -149,7 +116,11 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) { BLOCKD *bd = &xd->block[0]; TX_TYPE tx_type = get_tx_type_16x16(xd, bd); if (tx_type != DCT_DCT) { +#if CONFIG_INTHT16X16 + vp9_short_iht16x16(bd->dqcoeff, bd->diff, 32, tx_type); +#else vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob); +#endif } else { vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], &xd->block[0].diff[0], 32); diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index fd0eb3020..abd5b0fad 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -15,31 +15,31 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" -extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch); +void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch); -extern void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd); -extern void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd); -extern void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd); -extern void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, +void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, int16_t *output_coeff, int pitch); -extern void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd); -extern void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd); -extern void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd); -extern void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, +void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, int16_t *output_coeff, int pitch); -extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); -extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); -extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); -extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); +void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); +void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index e94144813..ed96292a4 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -102,9 +102,7 @@ void vp9_setup_block_dptrs(MACROBLOCKD *xd) { } } - blockd[24].diff = &xd->diff[384]; - - for (r = 0; r < 25; r++) { + for (r = 0; r < 24; r++) { blockd[r].qcoeff = xd->qcoeff + r * 16; blockd[r].dqcoeff = xd->dqcoeff + r * 16; } diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 0b7d98a58..d93b7d5fb 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -177,6 +177,7 @@ extern "C" int arnr_type; int tile_columns; + int tile_rows; struct vpx_fixed_buf two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index a333a4b02..6fee493f1 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -55,11 +55,11 @@ typedef struct frame_contexts { vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; nmv_context nmvc; @@ -80,19 +80,19 @@ typedef struct frame_contexts { unsigned int mbsplit_counts[VP9_NUMMBSPLITS]; vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_probs pre_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs pre_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_probs pre_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs pre_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_probs pre_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs pre_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32]; vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; nmv_context_counts NMVcount; @@ -133,7 +133,6 @@ typedef struct VP9Common { struct vpx_internal_error_info error; DECLARE_ALIGNED(16, int16_t, Y1dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, int16_t, Y2dequant[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, int16_t, UVdequant[QINDEX_RANGE][16]); int Width; @@ -184,8 +183,6 @@ typedef struct VP9Common { int last_kf_gf_q; /* Q used on the last GF or KF */ int y1dc_delta_q; - int y2dc_delta_q; - int y2ac_delta_q; int uvdc_delta_q; int uvac_delta_q; @@ -217,7 +214,7 @@ typedef struct VP9Common { int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ - /* Y,U,V,Y2 */ + /* Y,U,V */ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ ENTROPY_CONTEXT_PLANES left_context[4]; /* (up to) 4 contexts "" */ @@ -280,7 +277,9 @@ typedef struct VP9Common { int frame_parallel_decoding_mode; int tile_columns, log2_tile_columns; - int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_idx; + int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_col_idx; + int tile_rows, log2_tile_rows; + int cur_tile_mb_row_start, cur_tile_mb_row_end, cur_tile_row_idx; } VP9_COMMON; static int get_free_fb(VP9_COMMON *cm) { diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 64456a766..eb8de2126 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -18,6 +18,23 @@ * and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd). */ +/* Using multiplication and shifting instead of division in diagonal prediction. + * iscale table is calculated from ((1<<16) + (i+2)/2) / (i+2) and used as + * ((A + B) * iscale[i] + (1<<15)) >> 16; + * where A and B are weighted pixel values. + */ +static const unsigned int iscale[64] = { + 32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282, + 6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855, + 3641, 3449, 3277, 3121, 2979, 2849, 2731, 2621, + 2521, 2427, 2341, 2260, 2185, 2114, 2048, 1986, + 1928, 1872, 1820, 1771, 1725, 1680, 1638, 1598, + 1560, 1524, 1489, 1456, 1425, 1394, 1365, 1337, + 1311, 1285, 1260, 1237, 1214, 1192, 1170, 1150, + 1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008, +}; + + static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c, h, w, v; @@ -29,7 +46,7 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1; b = yabove_row[c + 2]; - ypred_ptr[c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3); + ypred_ptr[c] = ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16; } for (r = 1; r < n / 2 - 1; r++) { for (c = 0; c < n - 2 - 2 * r; c++) { @@ -38,7 +55,8 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1; b = ypred_ptr[(r - 1) * y_stride + c + 2]; - ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3); + ypred_ptr[r * y_stride + c] = + ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16; } } for (; r < n - 1; ++r) { @@ -77,7 +95,8 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1; b = yleft_col[r + 2]; - ypred_ptr[r * y_stride] = (2 * a + (r + 1) * b + (r + 3) / 2) / (r + 3); + ypred_ptr[r * y_stride] = ((2 * a + (r + 1) * b) * iscale[1+r] + + (1<<15)) >> 16; } for (c = 1; c < n / 2 - 1; c++) { for (r = 0; r < n - 2 - 2 * c; r++) { @@ -86,7 +105,8 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1; b = ypred_ptr[(r + 2) * y_stride + c - 1]; - ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3); + ypred_ptr[r * y_stride + c] = ((2 * a + (c + 1) * b) * iscale[1+c] + + (1<<15)) >> 16; } } for (; c < n - 1; ++c) { @@ -119,8 +139,8 @@ static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n, for (r = 0; r < n - 1; ++r) { for (c = 0; c <= r; ++c) { ypred_ptr[(r - c) * y_stride + c] = - (yabove_row[r + 1] * (c + 1) + - yleft_col[r + 1] * (r - c + 1) + r / 2 + 1) / (r + 2); + ((yabove_row[r + 1] * (c + 1) + + yleft_col[r + 1] * (r - c + 1)) * iscale[r] + (1<<15)) >> 16; } } for (c = 0; c <= r; ++c) { @@ -129,8 +149,8 @@ static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n, int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] - // yleft_col[r-1]); ypred_ptr[(r - c) * y_stride + c] = - (yabove_ext * (c + 1) + - yleft_ext * (r - c + 1) + r / 2 + 1) / (r + 2); + ((yabove_ext * (c + 1) + + yleft_ext * (r - c + 1)) * iscale[r] + (1<<15)) >> 16; } for (r = 1; r < n; ++r) { for (c = n - r; c < n; ++c) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 066989272..066e22dc7 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -29,12 +29,6 @@ forward_decls vp9_common_forward_decls prototype void vp9_dequantize_b "struct blockd *x" specialize vp9_dequantize_b -prototype void vp9_dequantize_b_2x2 "struct blockd *x" -specialize vp9_dequantize_b_2x2 - -prototype void vp9_dequant_dc_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dc, struct macroblockd *xd" -specialize vp9_dequant_dc_idct_add_y_block_8x8 - prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 @@ -44,18 +38,12 @@ specialize vp9_dequant_idct_add_uv_block_8x8 prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_16x16 -prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc, int eob" +prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_8x8 prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride" specialize vp9_dequant_idct_add -prototype void vp9_dequant_dc_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc" -specialize vp9_dequant_dc_idct_add - -prototype void vp9_dequant_dc_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dcs" -specialize vp9_dequant_dc_idct_add_y_block - prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_y_block @@ -254,13 +242,13 @@ prototype void vp9_convolve8_vert "const uint8_t *src, int src_stride, uint8_t * specialize vp9_convolve8_vert ssse3 prototype void vp9_convolve8_avg "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg +specialize vp9_convolve8_avg ssse3 prototype void vp9_convolve8_avg_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg_horiz +specialize vp9_convolve8_avg_horiz ssse3 prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg_vert +specialize vp9_convolve8_avg_vert ssse3 # # dct @@ -280,9 +268,6 @@ specialize vp9_short_idct10_8x8 prototype void vp9_short_idct1_8x8 "int16_t *input, int16_t *output" specialize vp9_short_idct1_8x8 -prototype void vp9_short_ihaar2x2 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_ihaar2x2 - prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct16x16 @@ -309,31 +294,26 @@ prototype void vp9_short_iht4x4 "int16_t *input, int16_t *output, int pitch, int specialize vp9_short_iht4x4 #endif +#if CONFIG_INTHT16X16 +prototype void vp9_short_iht16x16 "int16_t *input, int16_t *output, int pitch, int tx_type" +specialize vp9_short_iht16x16 +#endif + prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs" specialize vp9_ihtllm -# -# 2nd order -# -prototype void vp9_short_inv_walsh4x4_1 "int16_t *in, int16_t *out" -specialize vp9_short_inv_walsh4x4_1 - -prototype void vp9_short_inv_walsh4x4 "int16_t *in, int16_t *out" -specialize vp9_short_inv_walsh4x4_ - # dct and add prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" specialize vp9_dc_only_idct_add -if [ "$CONFIG_LOSSLESS" = "yes" ]; then prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_inv_walsh4x4_1_x8 prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_inv_walsh4x4_x8 prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" -prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out" -prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out" -fi +specialize vp9_dc_only_inv_walsh_add prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad32x3 @@ -547,7 +527,7 @@ specialize vp9_sub_pixel_mse32x32 prototype unsigned int vp9_get_mb_ss "const int16_t *" specialize vp9_get_mb_ss mmx sse2 # ENCODEMB INVOKE -prototype int vp9_mbblock_error "struct macroblock *mb, int dc" +prototype int vp9_mbblock_error "struct macroblock *mb" specialize vp9_mbblock_error mmx sse2 vp9_mbblock_error_sse2=vp9_mbblock_error_xmm @@ -591,27 +571,18 @@ specialize vp9_fht prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct8x8 -prototype void vp9_short_fhaar2x2 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fhaar2x2 - prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct4x4 prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct8x4 -prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_walsh4x4 - prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct32x32 prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct16x16 -prototype void vp9_short_walsh4x4_lossless "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_walsh4x4_lossless - prototype void vp9_short_walsh4x4_x8 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_walsh4x4_x8 diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c index 02e0d1461..29f89b618 100644 --- a/vp9/common/vp9_tile_common.c +++ b/vp9/common/vp9_tile_common.c @@ -10,17 +10,29 @@ #include "vp9/common/vp9_tile_common.h" -void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off, - int *max_tile_off) { - const int log2_n_tiles = cm->log2_tile_columns; - const int tile_idx = cm->cur_tile_idx; - const int mb_cols = cm->mb_cols; - const int sb_cols = (mb_cols + 3) >> 2; - const int sb_off1 = (tile_idx * sb_cols) >> log2_n_tiles; - const int sb_off2 = ((tile_idx + 1) * sb_cols) >> log2_n_tiles; - - *min_tile_off = (sb_off1 << 2) > mb_cols ? mb_cols : (sb_off1 << 2); - *max_tile_off = (sb_off2 << 2) > mb_cols ? mb_cols : (sb_off2 << 2); +static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off, + int *max_tile_off, int tile_idx, + int log2_n_tiles, int n_mbs) { + const int n_sbs = (n_mbs + 3) >> 2; + const int sb_off1 = (tile_idx * n_sbs) >> log2_n_tiles; + const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles; + + *min_tile_off = (sb_off1 << 2) > n_mbs ? n_mbs : (sb_off1 << 2); + *max_tile_off = (sb_off2 << 2) > n_mbs ? n_mbs : (sb_off2 << 2); +} + +void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx) { + cm->cur_tile_col_idx = tile_col_idx; + vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start, + &cm->cur_tile_mb_col_end, tile_col_idx, + cm->log2_tile_columns, cm->mb_cols); +} + +void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) { + cm->cur_tile_row_idx = tile_row_idx; + vp9_get_tile_offsets(cm, &cm->cur_tile_mb_row_start, + &cm->cur_tile_mb_row_end, tile_row_idx, + cm->log2_tile_rows, cm->mb_rows); } #define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6) diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h index 653b6b4f6..92bf50897 100644 --- a/vp9/common/vp9_tile_common.h +++ b/vp9/common/vp9_tile_common.h @@ -16,8 +16,9 @@ #define MIN_TILE_WIDTH 256 #define MAX_TILE_WIDTH 4096 -extern void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off, - int *max_tile_off); +extern void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx); + +extern void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx); extern void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles, int *delta_log2_n_tiles); diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index fbc95b6ce..6d3bb021a 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -79,6 +79,48 @@ void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr, unsigned int output_height, const short *filter); +void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d16_h8_avg_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d8_v8_avg_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d8_h8_avg_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d4_v8_avg_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d4_h8_avg_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -155,6 +197,82 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride, } } +void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + if (x_step_q4 == 16 && filter_x[3] != 128) { + while (w >= 16) { + vp9_filter_block1d16_h8_avg_ssse3(src, src_stride, + dst, dst_stride, + h, filter_x); + src += 16; + dst += 16; + w -= 16; + } + while (w >= 8) { + vp9_filter_block1d8_h8_avg_ssse3(src, src_stride, + dst, dst_stride, + h, filter_x); + src += 8; + dst += 8; + w -= 8; + } + while (w >= 4) { + vp9_filter_block1d4_h8_avg_ssse3(src, src_stride, + dst, dst_stride, + h, filter_x); + src += 4; + dst += 4; + w -= 4; + } + } + if (w) { + vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); + } +} + +void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + if (y_step_q4 == 16 && filter_y[3] != 128) { + while (w >= 16) { + vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride, + dst, dst_stride, + h, filter_y); + src += 16; + dst += 16; + w -= 16; + } + while (w >= 8) { + vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride, + dst, dst_stride, + h, filter_y); + src += 8; + dst += 8; + w -= 8; + } + while (w >= 4) { + vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride, + dst, dst_stride, + h, filter_y); + src += 4; + dst += 4; + w -= 4; + } + } + if (w) { + vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); + } +} + void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -200,4 +318,50 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h); } + +void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23); + + // check w/h due to fixed size fdata2 array + assert(w <= 16); + assert(h <= 16); + + if (x_step_q4 == 16 && y_step_q4 == 16 && + filter_x[3] != 128 && filter_y[3] != 128) { + if (w == 16) { + vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride, + fdata2, 16, + h + 7, filter_x); + vp9_filter_block1d16_v8_avg_ssse3(fdata2, 16, + dst, dst_stride, + h, filter_y); + return; + } + if (w == 8) { + vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride, + fdata2, 16, + h + 7, filter_x); + vp9_filter_block1d8_v8_avg_ssse3(fdata2, 16, + dst, dst_stride, + h, filter_y); + return; + } + if (w == 4) { + vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, + fdata2, 16, + h + 7, filter_x); + vp9_filter_block1d4_v8_avg_ssse3(fdata2, 16, + dst, dst_stride, + h, filter_y); + return; + } + } + vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); +} #endif diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index 5f039454a..fa24f4cd0 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -21,34 +21,8 @@ ; ;*************************************************************************************/ -;void vp9_filter_block1d8_v8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE -sym(vp9_filter_block1d4_v8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] +%macro VERTx4 1 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -86,7 +60,7 @@ sym(vp9_filter_block1d4_v8_ssse3): lea rbx, [rdx + rdx*4] add rbx, rdx ;pitch * 6 -.vp9_filter_block1d4_v8_ssse3_loop: +.loop: movd xmm0, [rsi] ;A movd xmm1, [rsi + rdx] ;B movd xmm2, [rsi + rdx * 2] ;C @@ -117,7 +91,10 @@ sym(vp9_filter_block1d4_v8_ssse3): add rsi, rdx add rax, rdx - +%if %1 + movd xmm1, [rdi] + pavgb xmm0, xmm1 +%endif movd [rdi], xmm0 %if ABI_IS_32BIT @@ -126,47 +103,10 @@ sym(vp9_filter_block1d4_v8_ssse3): add rdi, r8 %endif dec rcx - jnz .vp9_filter_block1d4_v8_ssse3_loop - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d8_v8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE -sym(vp9_filter_block1d8_v8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] + jnz .loop +%endm +%macro VERTx8 1 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -204,7 +144,7 @@ sym(vp9_filter_block1d8_v8_ssse3): lea rbx, [rdx + rdx*4] add rbx, rdx ;pitch * 6 -.vp9_filter_block1d8_v8_ssse3_loop: +.loop: movq xmm0, [rsi] ;A movq xmm1, [rsi + rdx] ;B movq xmm2, [rsi + rdx * 2] ;C @@ -235,7 +175,10 @@ sym(vp9_filter_block1d8_v8_ssse3): add rsi, rdx add rax, rdx - +%if %1 + movq xmm1, [rdi] + pavgb xmm0, xmm1 +%endif movq [rdi], xmm0 %if ABI_IS_32BIT @@ -244,47 +187,11 @@ sym(vp9_filter_block1d8_v8_ssse3): add rdi, r8 %endif dec rcx - jnz .vp9_filter_block1d8_v8_ssse3_loop - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret + jnz .loop +%endm -;void vp9_filter_block1d16_v8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE -sym(vp9_filter_block1d16_v8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] +%macro VERTx16 1 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -322,7 +229,7 @@ sym(vp9_filter_block1d16_v8_ssse3): lea rbx, [rdx + rdx*4] add rbx, rdx ;pitch * 6 -.vp9_filter_block1d16_v8_ssse3_loop: +.loop: movq xmm0, [rsi] ;A movq xmm1, [rsi + rdx] ;B movq xmm2, [rsi + rdx * 2] ;C @@ -350,7 +257,10 @@ sym(vp9_filter_block1d16_v8_ssse3): psraw xmm0, 7 packuswb xmm0, xmm0 - +%if %1 + movq xmm1, [rdi] + pavgb xmm0, xmm1 +%endif movq [rdi], xmm0 movq xmm0, [rsi + 8] ;A @@ -385,6 +295,10 @@ sym(vp9_filter_block1d16_v8_ssse3): add rsi, rdx add rax, rdx +%if %1 + movq xmm1, [rdi+8] + pavgb xmm0, xmm1 +%endif movq [rdi+8], xmm0 @@ -394,7 +308,38 @@ sym(vp9_filter_block1d16_v8_ssse3): add rdi, r8 %endif dec rcx - jnz .vp9_filter_block1d16_v8_ssse3_loop + jnz .loop +%endm + +;void vp9_filter_block1d8_v8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE +sym(vp9_filter_block1d4_v8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + VERTx4 0 add rsp, 16*5 pop rsp @@ -407,24 +352,100 @@ sym(vp9_filter_block1d16_v8_ssse3): pop rbp ret -;void vp9_filter_block1d4_h8_ssse3 +;void vp9_filter_block1d8_v8_ssse3 ;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, ; short *filter ;) -global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE -sym(vp9_filter_block1d4_h8_ssse3): +global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE +sym(vp9_filter_block1d8_v8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + VERTx8 0 + + add rsp, 16*5 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp9_filter_block1d16_v8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE +sym(vp9_filter_block1d16_v8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + VERTx16 0 + + add rsp, 16*5 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +global sym(vp9_filter_block1d4_v8_avg_ssse3) PRIVATE +sym(vp9_filter_block1d4_v8_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 - GET_GOT rbx push rsi push rdi + push rbx ; end prolog ALIGN_STACK 16, rax @@ -435,6 +456,86 @@ sym(vp9_filter_block1d4_h8_ssse3): %define k6k7 [rsp + 16*3] %define krd [rsp + 16*4] + VERTx4 1 + + add rsp, 16*5 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vp9_filter_block1d8_v8_avg_ssse3) PRIVATE +sym(vp9_filter_block1d8_v8_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + VERTx8 1 + + add rsp, 16*5 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vp9_filter_block1d16_v8_avg_ssse3) PRIVATE +sym(vp9_filter_block1d16_v8_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + VERTx16 1 + + add rsp, 16*5 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +%macro HORIZx4 1 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -464,7 +565,7 @@ sym(vp9_filter_block1d4_h8_ssse3): movsxd rdx, dword ptr arg(3) ;output_pitch movsxd rcx, dword ptr arg(4) ;output_height -.filter_block1d4_h8_rowloop_ssse3: +.loop: movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 @@ -491,54 +592,19 @@ sym(vp9_filter_block1d4_h8_ssse3): paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 - +%if %1 + movd xmm1, [rdi] + pavgb xmm0, xmm1 +%endif lea rsi, [rsi + rax] movd [rdi], xmm0 lea rdi, [rdi + rdx] dec rcx - jnz .filter_block1d4_h8_rowloop_ssse3 - - add rsp, 16*5 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d8_h8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE -sym(vp9_filter_block1d8_h8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] + jnz .loop +%endm +%macro HORIZx8 1 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -568,7 +634,7 @@ sym(vp9_filter_block1d8_h8_ssse3): movsxd rdx, dword ptr arg(3) ;output_pitch movsxd rcx, dword ptr arg(4) ;output_height -.filter_block1d8_h8_rowloop_ssse3: +.loop: movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 @@ -595,54 +661,20 @@ sym(vp9_filter_block1d8_h8_ssse3): paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 +%if %1 + movq xmm1, [rdi] + pavgb xmm0, xmm1 +%endif lea rsi, [rsi + rax] movq [rdi], xmm0 lea rdi, [rdi + rdx] dec rcx - jnz .filter_block1d8_h8_rowloop_ssse3 - - add rsp, 16*5 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d16_h8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE -sym(vp9_filter_block1d16_h8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] + jnz .loop +%endm +%macro HORIZx16 1 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -672,7 +704,7 @@ sym(vp9_filter_block1d16_h8_ssse3): movsxd rdx, dword ptr arg(3) ;output_pitch movsxd rcx, dword ptr arg(4) ;output_height -.filter_block1d16_h8_rowloop_ssse3: +.loop: movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 @@ -727,13 +759,48 @@ sym(vp9_filter_block1d16_h8_ssse3): psraw xmm3, 7 packuswb xmm3, xmm3 punpcklqdq xmm0, xmm3 +%if %1 + movdqa xmm1, [rdi] + pavgb xmm0, xmm1 +%endif lea rsi, [rsi + rax] movdqa [rdi], xmm0 lea rdi, [rdi + rdx] dec rcx - jnz .filter_block1d16_h8_rowloop_ssse3 + jnz .loop +%endm + +;void vp9_filter_block1d4_h8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE +sym(vp9_filter_block1d4_h8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + HORIZx4 0 add rsp, 16*5 pop rsp @@ -747,7 +814,188 @@ sym(vp9_filter_block1d16_h8_ssse3): pop rbp ret +;void vp9_filter_block1d8_h8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE +sym(vp9_filter_block1d8_h8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + HORIZx8 0 + + add rsp, 16*5 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp9_filter_block1d16_h8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE +sym(vp9_filter_block1d16_h8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + HORIZx16 0 + + add rsp, 16*5 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vp9_filter_block1d4_h8_avg_ssse3) PRIVATE +sym(vp9_filter_block1d4_h8_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + HORIZx4 1 + + add rsp, 16*5 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE +sym(vp9_filter_block1d8_h8_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + HORIZx8 1 + + add rsp, 16*5 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vp9_filter_block1d16_h8_avg_ssse3) PRIVATE +sym(vp9_filter_block1d16_h8_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + HORIZx16 1 + + add rsp, 16*5 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret SECTION_RODATA align 16 shuf_t0t1: diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index b8e867a0e..483332288 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -83,7 +83,6 @@ void vp9_init_de_quantizer(VP9D_COMP *pbi) { for (Q = 0; Q < QINDEX_RANGE; Q++) { pc->Y1dequant[Q][0] = (int16_t)vp9_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0] = (int16_t)vp9_dc2quant(Q, pc->y2dc_delta_q); pc->UVdequant[Q][0] = (int16_t)vp9_dc_uv_quant(Q, pc->uvdc_delta_q); /* all the ac values =; */ @@ -91,7 +90,6 @@ void vp9_init_de_quantizer(VP9D_COMP *pbi) { int rc = vp9_default_zig_zag1d_4x4[i]; pc->Y1dequant[Q][rc] = (int16_t)vp9_ac_yquant(Q); - pc->Y2dequant[Q][rc] = (int16_t)vp9_ac2quant(Q, pc->y2ac_delta_q); pc->UVdequant[Q][rc] = (int16_t)vp9_ac_uv_quant(Q, pc->uvac_delta_q); } } @@ -124,46 +122,25 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->block[i].dequant = pc->Y1dequant[QIndex]; } -#if CONFIG_LOSSLESS - if (!QIndex) { - pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8; - pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless; - pbi->idct_add = vp9_dequant_idct_add_lossless_c; - pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; - } else { - pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; + xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1; + xd->inv_txm4x4 = vp9_short_idct4x4llm; + xd->itxm_add = vp9_dequant_idct_add; + xd->dc_only_itxm_add = vp9_dc_only_idct_add_c; + xd->itxm_add_y_block = vp9_dequant_idct_add_y_block; + xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; + if (xd->lossless) { + assert(QIndex == 0); + xd->inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; + xd->inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + xd->itxm_add = vp9_dequant_idct_add_lossless_c; + xd->dc_only_itxm_add = vp9_dc_only_inv_walsh_add_c; + xd->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; + xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; } -#else - pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; -#endif for (i = 16; i < 24; i++) { xd->block[i].dequant = pc->UVdequant[QIndex]; } - - xd->block[24].dequant = pc->Y2dequant[QIndex]; - } /* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it @@ -230,7 +207,6 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { BLOCKD *bd = &xd->block[0]; TX_TYPE tx_type = get_tx_type_16x16(xd, bd); - assert(get_2nd_order_usage(xd) == 0); #ifdef DEC_DEBUG if (dec_debug) { int i; @@ -282,7 +258,6 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, #endif if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) { int i; - assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; int idx = (ib & 0x02) ? (ib + 2) : ib; @@ -302,38 +277,16 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs[idx]); } else { vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, - 0, xd->eobs[idx]); + xd->eobs[idx]); } } - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - assert(get_2nd_order_usage(xd) == 0); + } else { vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs, xd); - } else { - BLOCKD *b = &xd->block[24]; - assert(get_2nd_order_usage(xd) == 1); - vp9_dequantize_b_2x2(b); - vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff, - xd->block[0].dequant, - xd->predictor, - xd->dst.y_buffer, - xd->dst.y_stride, - xd->eobs, - xd->block[24].diff, - xd); } // Now do UV @@ -345,15 +298,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, int i8x8mode = b->bmi.as_mode.first; b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } else { @@ -381,7 +334,6 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, int i, eobtotal = 0; MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; if (mode == I8X8_PRED) { - assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; const int iblock[4] = {0, 1, 4, 5}; @@ -400,21 +352,20 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); } } else if (mode == B_PRED) { - assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 16; i++) { int b_mode; BLOCKD *b = &xd->block[i]; @@ -434,32 +385,29 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } if (!xd->mode_info_context->mbmi.mb_skip_coeff) { vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); } - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; vp9_build_intra_predictors_mbuv(xd); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); - } else if (mode == SPLITMV) { - assert(get_2nd_order_usage(xd) == 0); - pbi->idct_add_y_block(xd->qcoeff, + } else if (mode == SPLITMV || get_tx_type_4x4(xd, &xd->block[0]) == DCT_DCT) { + xd->itxm_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -484,50 +432,20 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, } } #endif - tx_type = get_tx_type_4x4(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - assert(get_2nd_order_usage(xd) == 0); - for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, - b->dst_stride, b->eob); - } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } - } - } else { - BLOCKD *b = &xd->block[24]; - assert(get_2nd_order_usage(xd) == 1); - vp9_dequantize_b(b); - if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; + for (i = 0; i < 16; i++) { + BLOCKD *b = &xd->block[i]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, + b->dst_stride, b->eob); } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } - vp9_dequantize_b(b); - pbi->dc_idct_add_y_block(xd->qcoeff, - xd->block[0].dequant, - xd->predictor, - xd->dst.y_buffer, - xd->dst.y_stride, - xd->eobs, - xd->block[24].diff); } - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -567,7 +485,6 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc, int n, int maska, int shiftb) { int x_idx = n & maska, y_idx = n >> shiftb; - BLOCKD *b = &xd->block[24]; TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -594,42 +511,26 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 1) * 8, xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + x_idx * 16 + (i & 1) * 8, - stride, stride, 0, b->eob); + stride, stride, b->eob); } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); } } else { - vp9_dequantize_b_2x2(b); - vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - vp9_dequant_dc_idct_add_y_block_8x8_inplace_c( + vp9_dequant_idct_add_y_block_8x8_inplace_c( xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); + xd->dst.y_stride, xd->eobs, xd); } + vp9_dequant_idct_add_uv_block_8x8_inplace_c( + xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); }; static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc, int n, int maska, int shiftb) { int x_idx = n & maska, y_idx = n >> shiftb; - BLOCKD *b = &xd->block[24]; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -645,7 +546,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 3) * 4, xd->dst.y_stride, xd->dst.y_stride, b->eob); } else { - vp9_dequant_idct_add_c( + xd->itxm_add( b->qcoeff, b->dequant, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, @@ -655,25 +556,10 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, } } } else { - vp9_dequantize_b(b); - if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - } - vp9_dequant_dc_idct_add_y_block_4x4_inplace_c( + vp9_dequant_idct_add_y_block_4x4_inplace_c( xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + xd->dst.y_stride, xd->eobs, xd); } vp9_dequant_idct_add_uv_block_4x4_inplace_c( xd->qcoeff + 16 * 16, xd->block[16].dequant, @@ -782,7 +668,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; xd->left_context = pc->left_context + y_idx; xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - for (i = 0; i < 25; i++) { + for (i = 0; i < 24; i++) { xd->block[i].eob = 0; xd->eobs[i] = 0; } @@ -887,7 +773,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; xd->left_context = pc->left_context + y_idx + (mb_row & 2); xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - for (i = 0; i < 25; i++) { + for (i = 0; i < 24; i++) { xd->block[i].eob = 0; xd->eobs[i] = 0; } @@ -933,7 +819,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, if (xd->mode_info_context->mbmi.mb_skip_coeff) { vp9_reset_mb_tokens_context(xd); } else if (!bool_error(bc)) { - for (i = 0; i < 25; i++) { + for (i = 0; i < 24; i++) { xd->block[i].eob = 0; xd->eobs[i] = 0; } @@ -1291,11 +1177,10 @@ static void read_coef_probs_common(BOOL_DECODER* const bc, if (vp9_read_bit(bc)) { for (i = 0; i < block_types; i++) { - for (j = !i; j < COEF_BANDS; j++) { + for (j = 0; j < COEF_BANDS; j++) { /* NB: This j loop starts from 1 on block type i == 0 */ for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - if (k >= 3 && ((i == 0 && j == 1) || - (i > 0 && j == 0))) + if (k >= 3 && j == 0) continue; for (l = 0; l < ENTROPY_NODES; l++) { vp9_prob *const p = coef_probs[i][j][k] + l; @@ -1314,16 +1199,18 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { VP9_COMMON *const pc = &pbi->common; read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES_4X4); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4); + read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_4x4, + BLOCK_TYPES_4X4_HYBRID); if (pbi->common.txfm_mode != ONLY_4X4) { read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES_8X8); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8); + read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8, + BLOCK_TYPES_8X8_HYBRID); } if (pbi->common.txfm_mode > ALLOW_8X8) { read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES_16X16); read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16, - BLOCK_TYPES_16X16); + BLOCK_TYPES_16X16_HYBRID); } if (pbi->common.txfm_mode > ALLOW_16X16) { read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); @@ -1529,17 +1416,20 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->sb64_coded = vp9_read_literal(&header_bc, 8); pc->sb32_coded = vp9_read_literal(&header_bc, 8); - - /* Read the loop filter level and type */ - pc->txfm_mode = vp9_read_literal(&header_bc, 2); - if (pc->txfm_mode == 3) - pc->txfm_mode += vp9_read_bit(&header_bc); - if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); - pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); - pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); + xd->lossless = vp9_read_bit(&header_bc); + if (xd->lossless) { + pc->txfm_mode = ONLY_4X4; + } else { + /* Read the loop filter level and type */ + pc->txfm_mode = vp9_read_literal(&header_bc, 2); + if (pc->txfm_mode == 3) + pc->txfm_mode += vp9_read_bit(&header_bc); + if (pc->txfm_mode == TX_MODE_SELECT) { + pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); + pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); + pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); + } } - pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc); pc->filter_level = vp9_read_literal(&header_bc, 6); pc->sharpness_level = vp9_read_literal(&header_bc, 3); @@ -1589,8 +1479,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { q_update = 0; /* AC 1st order Q = default */ pc->y1dc_delta_q = get_delta_q(&header_bc, pc->y1dc_delta_q, &q_update); - pc->y2dc_delta_q = get_delta_q(&header_bc, pc->y2dc_delta_q, &q_update); - pc->y2ac_delta_q = get_delta_q(&header_bc, pc->y2ac_delta_q, &q_update); pc->uvdc_delta_q = get_delta_q(&header_bc, pc->uvdc_delta_q, &q_update); pc->uvac_delta_q = get_delta_q(&header_bc, pc->uvac_delta_q, &q_update); @@ -1771,7 +1659,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { /* tile info */ { const unsigned char *data_ptr = data + first_partition_length_in_bytes; - int tile, delta_log2_tiles; + int tile_row, tile_col, delta_log2_tiles; vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles); while (delta_log2_tiles--) { @@ -1781,55 +1669,80 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { break; } } + pc->log2_tile_rows = vp9_read_bit(&header_bc); + if (pc->log2_tile_rows) + pc->log2_tile_rows += vp9_read_bit(&header_bc); pc->tile_columns = 1 << pc->log2_tile_columns; + pc->tile_rows = 1 << pc->log2_tile_rows; vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); if (pbi->oxcf.inv_tile_order) { - const unsigned char *data_ptr2[4]; + const int n_cols = pc->tile_columns; + const unsigned char *data_ptr2[4][1 << 6]; BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak); - data_ptr2[0] = data_ptr; - for (tile = 1; tile < pc->tile_columns; tile++) { - int size = data_ptr2[tile - 1][0] + (data_ptr2[tile - 1][1] << 8) + - (data_ptr2[tile - 1][2] << 16) + (data_ptr2[tile - 1][3] << 24); - data_ptr2[tile - 1] += 4; - data_ptr2[tile] = data_ptr2[tile - 1] + size; + // pre-initialize the offsets, we're going to read in inverse order + data_ptr2[0][0] = data_ptr; + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + if (tile_row) { + int size = data_ptr2[tile_row - 1][n_cols - 1][0] + + (data_ptr2[tile_row - 1][n_cols - 1][1] << 8) + + (data_ptr2[tile_row - 1][n_cols - 1][2] << 16) + + (data_ptr2[tile_row - 1][n_cols - 1][3] << 24); + data_ptr2[tile_row - 1][n_cols - 1] += 4; + data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size; + } + + for (tile_col = 1; tile_col < n_cols; tile_col++) { + int size = data_ptr2[tile_row][tile_col - 1][0] + + (data_ptr2[tile_row][tile_col - 1][1] << 8) + + (data_ptr2[tile_row][tile_col - 1][2] << 16) + + (data_ptr2[tile_row][tile_col - 1][3] << 24); + data_ptr2[tile_row][tile_col - 1] += 4; + data_ptr2[tile_row][tile_col] = + data_ptr2[tile_row][tile_col - 1] + size; + } } - for (tile = pc->tile_columns - 1; tile >= 0; tile--) { - pc->cur_tile_idx = tile; - vp9_get_tile_offsets(pc, &pc->cur_tile_mb_col_start, - &pc->cur_tile_mb_col_end); - setup_token_decoder(pbi, data_ptr2[tile], &residual_bc); - - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) { + vp9_get_tile_col_offsets(pc, tile_col); + setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], &residual_bc); + + /* Decode a row of superblocks */ + for (mb_row = pc->cur_tile_mb_row_start; + mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { + decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + } + if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1) + bc_bak = residual_bc; } - if (tile == pc->tile_columns - 1) - bc_bak = residual_bc; } residual_bc = bc_bak; } else { - for (tile = 0; tile < pc->tile_columns; tile++) { - pc->cur_tile_idx = tile; - vp9_get_tile_offsets(pc, &pc->cur_tile_mb_col_start, - &pc->cur_tile_mb_col_end); - - if (tile < pc->tile_columns - 1) - setup_token_decoder(pbi, data_ptr + 4, &residual_bc); - else - setup_token_decoder(pbi, data_ptr, &residual_bc); - - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); - } - if (tile < pc->tile_columns - 1) { - int size = data_ptr[0] + (data_ptr[1] << 8) + (data_ptr[2] << 16) + - (data_ptr[3] << 24); - data_ptr += 4 + size; + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) { + vp9_get_tile_col_offsets(pc, tile_col); + + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) + setup_token_decoder(pbi, data_ptr + 4, &residual_bc); + else + setup_token_decoder(pbi, data_ptr, &residual_bc); + + /* Decode a row of superblocks */ + for (mb_row = pc->cur_tile_mb_row_start; + mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { + decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + } + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { + int size = data_ptr[0] + (data_ptr[1] << 8) + (data_ptr[2] << 16) + + (data_ptr[3] << 24); + data_ptr += 4 + size; + } } } } diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 1f64767fa..3807a8c39 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -145,7 +145,6 @@ void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } -#if CONFIG_LOSSLESS void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride) { @@ -183,22 +182,10 @@ void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } -#endif - -void vp9_dequantize_b_2x2_c(BLOCKD *d) { - int i; - int16_t *DQ = d->dqcoeff; - const int16_t *Q = d->qcoeff; - const int16_t *DQC = d->dequant; - - for (i = 0; i < 16; i++) { - DQ[i] = (int16_t)((Q[i] * DQC[i])); - } -} void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, - int stride, int dc, int eob) { + int stride, int eob) { int16_t output[64]; int16_t *diff_ptr = output; int i; @@ -206,8 +193,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, /* If dc is 1, then input[0] is the reconstructed value, do not need * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. */ - if (!dc) - input[0] *= dq[0]; + input[0] *= dq[0]; /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to decide what to do. @@ -279,7 +265,11 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, input[i] = input[i] * dq[1]; // inverse hybrid transform +#if CONFIG_INTHT16X16 + vp9_short_iht16x16(input, output, 32, tx_type); +#else vp9_ihtllm(input, output, 32, tx_type, 16, eobs); +#endif // the idct halves ( >> 1) the pitch // vp9_short_idct16x16_c(input, output, 32); @@ -357,21 +347,17 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, if (eob) { input[0] = input[0] * dq[0] / 2; -#if !CONFIG_DWTDCTHYBRID if (eob == 1) { vp9_short_idct1_32x32_c(input, output); add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32); input[0] = 0; } else { -#endif for (i = 1; i < 1024; i++) input[i] = input[i] * dq[1] / 2; vp9_short_idct32x32_c(input, output, 64); vpx_memset(input, 0, 2048); add_residual(output, pred, pitch, dest, stride, 32, 32); -#if !CONFIG_DWTDCTHYBRID } -#endif } } diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 2a0ae80e8..0207e79d5 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -13,7 +13,7 @@ #define VP9_DECODER_VP9_DEQUANTIZE_H_ #include "vp9/common/vp9_blockd.h" -#if CONFIG_LOSSLESS + extern void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *output, @@ -40,21 +40,6 @@ extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t * unsigned char *dst_v, int stride, uint16_t *eobs); -#endif - -typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride); -typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); - -typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs, - const int16_t *dc); -typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs); -typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - uint16_t *eobs); void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, @@ -77,6 +62,12 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq const int16_t *dc, MACROBLOCKD *xd); +void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd); + void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dst, int stride, @@ -84,6 +75,12 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq const int16_t *dc, MACROBLOCKD *xd); +void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd); + void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dstu, unsigned char *dstv, diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index acf69d906..0786e2965 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -63,24 +63,11 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { return decode_bool(br, 128) ? -value_to_sign : value_to_sign; } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#define INCREMENT_COUNT(token) \ - do { \ - coef_counts[type][coef_bands[c]][pn][token]++; \ - pn = pt = vp9_prev_token_class[token]; \ - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(coef_bands[c + 1])) \ - pn = vp9_get_coef_neighbor_context( \ - qcoeff_ptr, nodc, neighbors, scan[c + 1]); \ - } while (0) -#else -#define PT pt #define INCREMENT_COUNT(token) \ do { \ - coef_counts[type][coef_bands[c]][pt][token]++; \ - pt = vp9_prev_token_class[token]; \ + coef_counts[type][get_coef_band(c)][pt][token]++; \ + pt = vp9_get_coef_context(&recent_energy, token); \ } while (0) -#endif /* CONFIG_NEWCOEFCONTEXT */ #define WRITE_COEF_CONTINUE(val, token) \ { \ @@ -100,20 +87,15 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, BOOL_DECODER* const br, int block_idx, PLANE_TYPE type, TX_TYPE tx_type, int seg_eob, int16_t *qcoeff_ptr, - const int *const scan, TX_SIZE txfm_size, - const int *coef_bands) { + const int *const scan, TX_SIZE txfm_size) { ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context; ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context; const int aidx = vp9_block2above[txfm_size][block_idx]; const int lidx = vp9_block2left[txfm_size][block_idx]; ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0; FRAME_CONTEXT *const fc = &dx->common.fc; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif - int nodc = (type == PLANE_TYPE_Y_NO_DC); - int pt, c = nodc; + int recent_energy = 0; + int pt, c = 0; vp9_coeff_probs *coef_probs; vp9_prob *prob; vp9_coeff_count *coef_counts; @@ -138,10 +120,8 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, coef_counts = fc->hybrid_coef_counts_8x8; } #if CONFIG_CNVCONTEXT - if (type != PLANE_TYPE_Y2) { - above_ec = (A0[aidx] + A0[aidx + 1]) != 0; - left_ec = (L0[lidx] + L0[lidx + 1]) != 0; - } + above_ec = (A0[aidx] + A0[aidx + 1]) != 0; + left_ec = (L0[lidx] + L0[lidx + 1]) != 0; #endif break; case TX_16X16: @@ -158,7 +138,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1]) != 0; left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1]) != 0; - } else if (type != PLANE_TYPE_Y2) { + } else { above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0; left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0; } @@ -179,7 +159,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, A2[aidx] + A2[aidx + 1] + A3[aidx] + A3[aidx + 1]) != 0; left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1] + L2[lidx] + L2[lidx + 1] + L3[lidx] + L3[lidx + 1]) != 0; - } else if (type != PLANE_TYPE_Y2) { + } else { ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3] + @@ -192,15 +172,11 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, } VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec); -#if CONFIG_NEWCOEFCONTEXT - pn = pt; - neighbors = vp9_get_coef_neighbors_handle(scan); -#endif while (1) { int val; const uint8_t *cat6 = cat6_prob; if (c >= seg_eob) break; - prob = coef_probs[type][coef_bands[c]][PT]; + prob = coef_probs[type][get_coef_band(c)][pt]; if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; SKIP_START: @@ -208,7 +184,7 @@ SKIP_START: if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[type][coef_bands[c]][PT]; + prob = coef_probs[type][get_coef_band(c)][pt]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ @@ -272,10 +248,10 @@ SKIP_START: } if (c < seg_eob) - coef_counts[type][coef_bands[c]][PT][DCT_EOB_TOKEN]++; + coef_counts[type][get_coef_band(c)][pt][DCT_EOB_TOKEN]++; - A0[aidx] = L0[lidx] = (c > !type); - if (txfm_size >= TX_8X8 && type != PLANE_TYPE_Y2) { + A0[aidx] = L0[lidx] = (c > 0); + if (txfm_size >= TX_8X8) { A0[aidx + 1] = L0[lidx + 1] = A0[aidx]; if (txfm_size >= TX_16X16) { if (type == PLANE_TYPE_UV) { @@ -319,10 +295,6 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { - ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context; - ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context; - ENTROPY_CONTEXT* const A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); - ENTROPY_CONTEXT* const L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); uint16_t *const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; int c, i, eobtotal = 0, seg_eob; @@ -332,7 +304,7 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, DCT_DCT, get_eob(xd, segment_id, 1024), xd->sb_coeff_data.qcoeff, vp9_default_zig_zag1d_32x32, - TX_32X32, vp9_coef_bands_32x32); + TX_32X32); eobtotal += c; // 16x16 chroma blocks @@ -341,21 +313,16 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, eobs[i] = c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, vp9_default_zig_zag1d_16x16, - TX_16X16, vp9_coef_bands_16x16); + TX_16X16); eobtotal += c; } - // no Y2 block - A0[8] = L0[8] = A1[8] = L1[8] = 0; - return eobtotal; } static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { - ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; uint16_t *const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; int c, i, eobtotal = 0, seg_eob; @@ -365,7 +332,7 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, get_tx_type(xd, &xd->block[0]), get_eob(xd, segment_id, 256), xd->qcoeff, vp9_default_zig_zag1d_16x16, - TX_16X16, vp9_coef_bands_16x16); + TX_16X16); eobtotal += c; // 8x8 chroma blocks @@ -374,11 +341,9 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, eobs[i] = c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, vp9_default_zig_zag1d_8x8, - TX_8X8, vp9_coef_bands_8x8); + TX_8X8); eobtotal += c; } - A[8] = 0; - L[8] = 0; return eobtotal; } @@ -386,36 +351,16 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { uint16_t *const eobs = xd->eobs; - PLANE_TYPE type; int c, i, eobtotal = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; - int has_2nd_order = get_2nd_order_usage(xd); - // 2nd order DC block - if (has_2nd_order) { - eobs[24] = c = decode_coefs(pbi, xd, bc, 24, PLANE_TYPE_Y2, - DCT_DCT, get_eob(xd, segment_id, 4), - xd->block[24].qcoeff, - vp9_default_zig_zag1d_4x4, TX_8X8, - vp9_coef_bands_4x4); - eobtotal += c - 4; - type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - eobs[24] = 0; - type = PLANE_TYPE_Y_WITH_DC; - } - // luma blocks seg_eob = get_eob(xd, segment_id, 64); for (i = 0; i < 16; i += 4) { - eobs[i] = c = decode_coefs(pbi, xd, bc, i, type, - type == PLANE_TYPE_Y_WITH_DC ? - get_tx_type(xd, xd->block + i) : DCT_DCT, + eobs[i] = c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + get_tx_type(xd, xd->block + i), seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_8x8, - TX_8X8, vp9_coef_bands_8x8); + vp9_default_zig_zag1d_8x8, TX_8X8); eobtotal += c; } @@ -427,16 +372,14 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, for (i = 16; i < 24; i++) { eobs[i] = c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_4x4, TX_4X4, - vp9_coef_bands_4x4); + vp9_default_zig_zag1d_4x4, TX_4X4); eobtotal += c; } } else { for (i = 16; i < 24; i += 4) { eobs[i] = c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_8x8, - TX_8X8, vp9_coef_bands_8x8); + vp9_default_zig_zag1d_8x8, TX_8X8); eobtotal += c; } } @@ -452,7 +395,7 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, int c; c = decode_coefs(dx, xd, bc, i, type, tx_type, seg_eob, - xd->block[i].qcoeff, scan, TX_4X4, vp9_coef_bands_4x4); + xd->block[i].qcoeff, scan, TX_4X4); eobs[i] = c; return c; @@ -517,26 +460,13 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { int i, eobtotal = 0; - PLANE_TYPE type; const int segment_id = xd->mode_info_context->mbmi.segment_id; const int seg_eob = get_eob(xd, segment_id, 16); - const int has_2nd_order = get_2nd_order_usage(xd); - - // 2nd order DC block - if (has_2nd_order) { - eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24, seg_eob, - DCT_DCT, vp9_default_zig_zag1d_4x4) - 16; - type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - xd->eobs[24] = 0; - type = PLANE_TYPE_Y_WITH_DC; - } // luma blocks for (i = 0; i < 16; ++i) { - eobtotal += decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob); + eobtotal += decode_coefs_4x4_y(dx, xd, bc, + PLANE_TYPE_Y_WITH_DC, i, seg_eob); } // chroma blocks diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 152527cff..85bbdc6c7 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -10,54 +10,27 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_blockd.h" -#if CONFIG_LOSSLESS #include "vp9/decoder/vp9_dequantize.h" -#endif -void vp9_dequant_dc_idct_add_y_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, uint16_t *eobs, - const int16_t *dc) { - int i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_dc_idct_add_c(q, dq, pre, dst, 16, stride, dc[0]); - else - vp9_dc_only_idct_add_c(dc[0], pre, dst, 16, stride); - - q += 16; - pre += 4; - dst += 4; - dc++; - } - - pre += 64 - 16; - dst += 4 * stride - 16; - } -} - -void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, - int stride, - uint16_t *eobs, - const int16_t *dc, - MACROBLOCKD *xd) { +void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]); - else - vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride); + if (*eobs++ > 1) { + xd->itxm_add(q, dq, dst, dst, stride, stride); + } else { + xd->dc_only_itxm_add(q[0]*dq[0], dst, dst, stride, stride); + ((int *)q)[0] = 0; + } q += 16; dst += 4; - dc++; } dst += 4 * stride - 16; @@ -143,9 +116,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride); + xd->itxm_add(q, dq, dstu, dstu, stride, stride); } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride); + xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride); ((int *)q)[0] = 0; } @@ -159,9 +132,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride); + xd->itxm_add(q, dq, dstv, dstv, stride, stride); } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride); + xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride); ((int *)q)[0] = 0; } @@ -173,52 +146,24 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, } } -void vp9_dequant_dc_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, uint16_t *eobs, - const int16_t *dc, - MACROBLOCKD *xd) { - q[0] = dc[0]; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]); - - q[64] = dc[1]; - vp9_dequant_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, 1, - xd->eobs[4]); - - q[128] = dc[4]; - vp9_dequant_idct_add_8x8_c(&q[128], dq, pre + 8 * 16, - dst + 8 * stride, 16, stride, 1, xd->eobs[8]); - - q[192] = dc[8]; - vp9_dequant_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8, - dst + 8 * stride + 8, 16, stride, 1, - xd->eobs[12]); -} +void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd) { + vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->eobs[0]); -void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, - int stride, - uint16_t *eobs, - const int16_t *dc, - MACROBLOCKD *xd) { - q[0] = dc[0]; - vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]); - - q[64] = dc[1]; vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8, - dst + 8, stride, stride, 1, xd->eobs[4]); + dst + 8, stride, stride, xd->eobs[4]); - q[128] = dc[4]; vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, - dst + 8 * stride, stride, stride, 1, - xd->eobs[8]); + dst + 8 * stride, stride, stride, + xd->eobs[8]); - q[192] = dc[8]; vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, - dst + 8 * stride + 8, stride, stride, 1, - xd->eobs[12]); + dst + 8 * stride + 8, stride, stride, + xd->eobs[12]); } void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, @@ -229,13 +174,13 @@ void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *origdest = dst; uint8_t *origpred = pre; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]); vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, 16, stride, 0, xd->eobs[4]); + origdest + 8, 16, stride, xd->eobs[4]); vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, - origdest + 8 * stride, 16, stride, 0, xd->eobs[8]); + origdest + 8 * stride, 16, stride, xd->eobs[8]); vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, - origdest + 8 * stride + 8, 16, stride, 0, + origdest + 8 * stride + 8, 16, stride, xd->eobs[12]); } @@ -245,12 +190,12 @@ void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *dstv, int stride, uint16_t *eobs, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]); q += 64; pre += 64; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]); } void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, @@ -259,40 +204,14 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, int stride, uint16_t *eobs, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0, + vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, xd->eobs[16]); q += 64; - vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0, + vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, xd->eobs[20]); } -#if CONFIG_LOSSLESS -void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, - uint16_t *eobs, - const int16_t *dc) { - int i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_dc_idct_add_lossless_c(q, dq, pre, dst, 16, stride, dc[0]); - else - vp9_dc_only_inv_walsh_add_c(dc[0], pre, dst, 16, stride); - - q += 16; - pre += 4; - dst += 4; - dc++; - } - - pre += 64 - 16; - dst += 4 * stride - 16; - } -} void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, uint8_t *pre, @@ -363,5 +282,4 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, dstv += 4 * stride - 8; } } -#endif diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index e04b9f5e4..8c1f76e73 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -18,41 +18,6 @@ // #define DEC_DEBUG -typedef struct { - int ithread; - void *ptr1; - void *ptr2; -} DECODETHREAD_DATA; - -typedef struct { - MACROBLOCKD mbd; - int mb_row; - int current_mb_col; - short *coef_ptr; -} MB_ROW_DEC; - -typedef struct { - int const *scan; - int const *scan_8x8; - uint8_t const *ptr_block2leftabove; - vp9_tree_index const *vp9_coef_tree_ptr; - unsigned char *norm_ptr; - uint8_t *ptr_coef_bands_x; - uint8_t *ptr_coef_bands_x_8x8; - - ENTROPY_CONTEXT_PLANES *A; - ENTROPY_CONTEXT_PLANES *L; - - int16_t *qcoeff_start_ptr; - - vp9_prob const *coef_probs_4x4[BLOCK_TYPES_4X4]; - vp9_prob const *coef_probs_8x8[BLOCK_TYPES_8X8]; - vp9_prob const *coef_probs_16X16[BLOCK_TYPES_16X16]; - - uint8_t eob[25]; - -} DETOK; - typedef struct VP9Decompressor { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -68,14 +33,6 @@ typedef struct VP9Decompressor { int64_t last_time_stamp; int ready_for_new_data; - DETOK detoken; - - vp9_dequant_idct_add_fn_t idct_add; - vp9_dequant_dc_idct_add_fn_t dc_idct_add; - vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; - vp9_dequant_idct_add_y_block_fn_t idct_add_y_block; - vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block; - int refresh_frame_flags; vp9_prob prob_skip_false; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index c5f2a70c6..6ad277b84 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -43,11 +43,11 @@ int intra_mode_stats[VP9_KF_BINTRAMODES] [VP9_KF_BINTRAMODES] [VP9_KF_BINTRAMODES]; vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4]; -vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4]; +vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8]; -vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8]; +vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16]; -vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16]; +vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; extern unsigned int active_section; @@ -810,7 +810,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); #ifdef ENTROPY_STATS - accum_mv_refs(mode, ct); active_section = 3; #endif @@ -1089,14 +1088,15 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, } static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, - TOKENEXTRA **tok) { + TOKENEXTRA **tok, TOKENEXTRA *tok_end) { VP9_COMMON *const c = &cpi->common; const int mis = c->mode_info_stride; - MODE_INFO *m, *m_ptr = c->mi + c->cur_tile_mb_col_start; + MODE_INFO *m, *m_ptr = c->mi; int i, mb_row, mb_col; - TOKENEXTRA *tok_end = *tok + cpi->tok_count; - for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) { + m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis; + for (mb_row = c->cur_tile_mb_row_start; + mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) { m = m_ptr; for (mb_col = c->cur_tile_mb_col_start; mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) { @@ -1186,7 +1186,7 @@ static void build_tree_distribution(vp9_coeff_probs *coef_probs, for (i = 0; i < block_types; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + if (k >= 3 && j == 0) continue; vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, @@ -1215,7 +1215,8 @@ static void build_coeff_contexts(VP9_COMP *cpi) { #ifdef ENTROPY_STATS cpi, hybrid_context_counters_4x4, #endif - cpi->frame_hybrid_branch_ct_4x4, BLOCK_TYPES_4X4); + cpi->frame_hybrid_branch_ct_4x4, + BLOCK_TYPES_4X4_HYBRID); build_tree_distribution(cpi->frame_coef_probs_8x8, cpi->coef_counts_8x8, #ifdef ENTROPY_STATS @@ -1227,7 +1228,8 @@ static void build_coeff_contexts(VP9_COMP *cpi) { #ifdef ENTROPY_STATS cpi, hybrid_context_counters_8x8, #endif - cpi->frame_hybrid_branch_ct_8x8, BLOCK_TYPES_8X8); + cpi->frame_hybrid_branch_ct_8x8, + BLOCK_TYPES_8X8_HYBRID); build_tree_distribution(cpi->frame_coef_probs_16x16, cpi->coef_counts_16x16, #ifdef ENTROPY_STATS @@ -1239,7 +1241,8 @@ static void build_coeff_contexts(VP9_COMP *cpi) { #ifdef ENTROPY_STATS cpi, hybrid_context_counters_16x16, #endif - cpi->frame_hybrid_branch_ct_16x16, BLOCK_TYPES_16X16); + cpi->frame_hybrid_branch_ct_16x16, + BLOCK_TYPES_16X16_HYBRID); build_tree_distribution(cpi->frame_coef_probs_32x32, cpi->coef_counts_32x32, #ifdef ENTROPY_STATS @@ -1265,7 +1268,7 @@ static void update_coef_probs_common(vp9_writer* const bc, /* dry run to see if there is any udpate at all needed */ savings = 0; for (i = 0; i < block_types; ++i) { - for (j = !i; j < COEF_BANDS; ++j) { + for (j = 0; j < COEF_BANDS; ++j) { int prev_coef_savings[ENTROPY_NODES] = {0}; for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { for (t = 0; t < ENTROPY_NODES; ++t) { @@ -1274,7 +1277,7 @@ static void update_coef_probs_common(vp9_writer* const bc, const vp9_prob upd = COEF_UPDATE_PROB; int s = prev_coef_savings[t]; int u = 0; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + if (k >= 3 && j == 0) continue; #if defined(SEARCH_NEWP) s = prob_diff_update_savings_search( @@ -1309,7 +1312,7 @@ static void update_coef_probs_common(vp9_writer* const bc, } else { vp9_write_bit(bc, 1); for (i = 0; i < block_types; ++i) { - for (j = !i; j < COEF_BANDS; ++j) { + for (j = 0; j < COEF_BANDS; ++j) { int prev_coef_savings[ENTROPY_NODES] = {0}; for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { // calc probs and branch cts for this frame only @@ -1319,7 +1322,7 @@ static void update_coef_probs_common(vp9_writer* const bc, const vp9_prob upd = COEF_UPDATE_PROB; int s = prev_coef_savings[t]; int u = 0; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + if (k >= 3 && j == 0) continue; #if defined(SEARCH_NEWP) @@ -1376,7 +1379,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->frame_hybrid_coef_probs_4x4, cpi->common.fc.hybrid_coef_probs_4x4, cpi->frame_hybrid_branch_ct_4x4, - BLOCK_TYPES_4X4); + BLOCK_TYPES_4X4_HYBRID); /* do not do this if not even allowed */ if (cpi->common.txfm_mode != ONLY_4X4) { @@ -1398,7 +1401,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->frame_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8, cpi->frame_hybrid_branch_ct_8x8, - BLOCK_TYPES_8X8); + BLOCK_TYPES_8X8_HYBRID); } if (cpi->common.txfm_mode > ALLOW_8X8) { @@ -1419,7 +1422,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->frame_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16, cpi->frame_hybrid_branch_ct_16x16, - BLOCK_TYPES_16X16); + BLOCK_TYPES_16X16_HYBRID); } if (cpi->common.txfm_mode > ALLOW_16X16) { @@ -1669,7 +1672,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); vp9_write_literal(&header_bc, pc->sb32_coded, 8); - { + vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless); + if (cpi->mb.e_mbd.lossless) { + pc->txfm_mode = ONLY_4X4; + } else { if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] + cpi->txfm_count_16x16p[TX_4X4] + @@ -1779,8 +1785,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, // Transmit Dc, Second order and Uv quantizer delta information put_delta_q(&header_bc, pc->y1dc_delta_q); - put_delta_q(&header_bc, pc->y2dc_delta_q); - put_delta_q(&header_bc, pc->y2ac_delta_q); put_delta_q(&header_bc, pc->uvdc_delta_q); put_delta_q(&header_bc, pc->uvac_delta_q); @@ -2040,6 +2044,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, break; } } + vp9_write_bit(&header_bc, pc->log2_tile_rows != 0); + if (pc->log2_tile_rows != 0) + vp9_write_bit(&header_bc, pc->log2_tile_rows != 1); } vp9_stop_encode(&header_bc); @@ -2069,33 +2076,45 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } { - int tile, total_size = 0; + int tile_row, tile_col, total_size = 0; unsigned char *data_ptr = cx_data + header_bc.pos; - TOKENEXTRA *tok = cpi->tok; - - for (tile = 0; tile < pc->tile_columns; tile++) { - pc->cur_tile_idx = tile; - vp9_get_tile_offsets(pc, &pc->cur_tile_mb_col_start, - &pc->cur_tile_mb_col_end); - - if (tile < pc->tile_columns - 1) - vp9_start_encode(&residual_bc, data_ptr + total_size + 4); - else - vp9_start_encode(&residual_bc, data_ptr + total_size); - write_modes(cpi, &residual_bc, &tok); - vp9_stop_encode(&residual_bc); - if (tile < pc->tile_columns - 1) { - /* size of this tile */ - data_ptr[total_size + 0] = residual_bc.pos; - data_ptr[total_size + 1] = residual_bc.pos >> 8; - data_ptr[total_size + 2] = residual_bc.pos >> 16; - data_ptr[total_size + 3] = residual_bc.pos >> 24; - total_size += 4; - } + TOKENEXTRA *tok[1 << 6], *tok_end; + + tok[0] = cpi->tok; + for (tile_col = 1; tile_col < pc->tile_columns; tile_col++) + tok[tile_col] = tok[tile_col - 1] + cpi->tok_count[tile_col - 1]; + + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + tok_end = cpi->tok + cpi->tok_count[0]; + for (tile_col = 0; tile_col < pc->tile_columns; + tile_col++, tok_end += cpi->tok_count[tile_col]) { + vp9_get_tile_col_offsets(pc, tile_col); + + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) + vp9_start_encode(&residual_bc, data_ptr + total_size + 4); + else + vp9_start_encode(&residual_bc, data_ptr + total_size); + write_modes(cpi, &residual_bc, &tok[tile_col], tok_end); + vp9_stop_encode(&residual_bc); + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { + /* size of this tile */ + data_ptr[total_size + 0] = residual_bc.pos; + data_ptr[total_size + 1] = residual_bc.pos >> 8; + data_ptr[total_size + 2] = residual_bc.pos >> 16; + data_ptr[total_size + 3] = residual_bc.pos >> 24; + total_size += 4; + } - total_size += residual_bc.pos; + total_size += residual_bc.pos; + } } + assert((unsigned int)(tok[0] - cpi->tok) == cpi->tok_count[0]); + for (tile_col = 1; tile_col < pc->tile_columns; tile_col++) + assert((unsigned int)(tok[tile_col] - tok[tile_col - 1]) == + cpi->tok_count[tile_col]); + *size += total_size; } } @@ -2133,17 +2152,22 @@ void print_tree_update_probs() { print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES_4X4, "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]"); - print_tree_update_for_type(f, hybrid_tree_update_hist_4x4, BLOCK_TYPES_4X4, - "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]"); + print_tree_update_for_type(f, hybrid_tree_update_hist_4x4, + BLOCK_TYPES_4X4_HYBRID, + "vp9_coef_update_probs_4x4" + "[BLOCK_TYPES_4X4_HYBRID]"); print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES_8X8, "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]"); - print_tree_update_for_type(f, hybrid_tree_update_hist_8x8, BLOCK_TYPES_8X8, - "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]"); + print_tree_update_for_type(f, hybrid_tree_update_hist_8x8, + BLOCK_TYPES_8X8_HYBRID, + "vp9_coef_update_probs_8x8" + "[BLOCK_TYPES_8X8_HYBRID]"); print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES_16X16, "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); print_tree_update_for_type(f, hybrid_tree_update_hist_16x16, - BLOCK_TYPES_16X16, - "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); + BLOCK_TYPES_16X16_HYBRID, + "vp9_coef_update_probs_16x16" + "[BLOCK_TYPES_16X16_HYBRID]"); print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32, "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]"); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 91d4c4530..6c4645561 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -89,11 +89,10 @@ typedef struct superblock { } SUPERBLOCK; typedef struct macroblock { - DECLARE_ALIGNED(16, int16_t, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y - DECLARE_ALIGNED(16, int16_t, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y + DECLARE_ALIGNED(16, int16_t, src_diff[384]); // 16x16 Y 8x8 U 8x8 V + DECLARE_ALIGNED(16, int16_t, coeff[384]); // 16x16 Y 8x8 U 8x8 V // 16 Y blocks, 4 U blocks, 4 V blocks, - // 1 DC 2nd order block each with 16 entries - BLOCK block[25]; + BLOCK block[24]; SUPERBLOCK sb_coeff_data; @@ -158,7 +157,7 @@ typedef struct macroblock { unsigned char *active_ptr; vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4]; - vp9_coeff_count hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4]; + vp9_coeff_count hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4_HYBRID]; int optimize; @@ -169,17 +168,14 @@ typedef struct macroblock { PICK_MODE_CONTEXT sb32_context[4]; PICK_MODE_CONTEXT sb64_context; - void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); - void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch); - void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d); void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); - void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch); - void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch); - void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d); void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); - void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); } MACROBLOCK; #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 7af044fe4..b9648638d 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -323,6 +323,8 @@ static const int16_t adst_i16[256] = { }; #endif +#define NEW_FDCT8x8 1 +#if !NEW_FDCT8x8 static const int xC1S7 = 16069; static const int xC2S6 = 15137; static const int xC3S5 = 13623; @@ -560,22 +562,7 @@ void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) { op++; } } - -void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) { - /* [1 1; 1 -1] orthogonal transform */ - /* use position: 0,1, 4, 8 */ - int i; - short *ip1 = input; - short *op1 = output; - for (i = 0; i < 16; i++) { - op1[i] = 0; - } - - op1[0] = (ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1) >> 1; - op1[1] = (ip1[0] - ip1[1] + ip1[4] - ip1[8]) >> 1; - op1[4] = (ip1[0] + ip1[1] - ip1[4] - ip1[8]) >> 1; - op1[8] = (ip1[0] - ip1[1] - ip1[4] + ip1[8]) >> 1; -} +#endif /* For test */ #define TEST_INT 1 @@ -836,86 +823,78 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch) vp9_short_fdct4x4_c(input + 4, output + 16, pitch); } -void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { - int i; - int a1, b1, c1, d1; - short *ip = input; - short *op = output; - int pitch_short = pitch >> 1; +#if NEW_FDCT8x8 +static void fdct8_1d(int16_t *input, int16_t *output) { + int16_t step[8]; + int temp1, temp2; - for (i = 0; i < 4; i++) { - a1 = ip[0 * pitch_short] + ip[3 * pitch_short]; - b1 = ip[1 * pitch_short] + ip[2 * pitch_short]; - c1 = ip[1 * pitch_short] - ip[2 * pitch_short]; - d1 = ip[0 * pitch_short] - ip[3 * pitch_short]; + // stage 1 + step[0] = input[0] + input[7]; + step[1] = input[1] + input[6]; + step[2] = input[2] + input[5]; + step[3] = input[3] + input[4]; + step[4] = input[3] - input[4]; + step[5] = input[2] - input[5]; + step[6] = input[1] - input[6]; + step[7] = input[0] - input[7]; - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; + fdct4_1d(step, step); - ip++; - op++; - } - ip = output; - op = output; + // Stage 2 + output[4] = step[4]; + temp1 = (-step[5] + step[6]) * cospi_16_64; + temp2 = (step[6] + step[5]) * cospi_16_64; + output[5] = dct_const_round_shift(temp1); + output[6] = dct_const_round_shift(temp2); + output[7] = step[7]; - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; + // Stage 3 + step[4] = output[4] + output[5]; + step[5] = -output[5] + output[4]; + step[6] = -output[6] + output[7]; + step[7] = output[7] + output[6]; - op[0] = (a1 + b1 + 1) >> 1; - op[1] = (c1 + d1) >> 1; - op[2] = (a1 - b1) >> 1; - op[3] = (d1 - c1) >> 1; + // Stage 4 + output[0] = step[0]; + output[4] = step[2]; + output[2] = step[1]; + output[6] = step[3]; - ip += 4; - op += 4; - } + temp1 = step[4] * cospi_28_64 + step[7] * cospi_4_64; + temp2 = step[5] * cospi_12_64 + step[6] * cospi_20_64; + output[1] = dct_const_round_shift(temp1); + output[5] = dct_const_round_shift(temp2); + temp1 = step[6] * cospi_12_64 + step[5] * -cospi_20_64; + temp2 = step[7] * cospi_28_64 + step[4] * -cospi_4_64; + output[3] = dct_const_round_shift(temp1); + output[7] = dct_const_round_shift(temp2); } -#if CONFIG_LOSSLESS -void vp9_short_walsh4x4_lossless_c(short *input, short *output, int pitch) { - int i; - int a1, b1, c1, d1; - short *ip = input; - short *op = output; - int pitch_short = pitch >> 1; - - for (i = 0; i < 4; i++) { - a1 = (ip[0 * pitch_short] + ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - b1 = (ip[1 * pitch_short] + ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - c1 = (ip[1 * pitch_short] - ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - d1 = (ip[0 * pitch_short] - ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; +void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) { + int shortpitch = pitch >> 1; + int i, j; + int16_t out[64]; + int16_t temp_in[8], temp_out[8]; - ip++; - op++; + // First transform columns + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) + temp_in[j] = input[j * shortpitch + i] << 2; + fdct8_1d(temp_in, temp_out); + for (j = 0; j < 8; j++) + out[j * 8 + i] = temp_out[j]; } - ip = output; - op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[1] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[2] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[3] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - - ip += 4; - op += 4; + // Then transform rows + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j + i * 8]; + fdct8_1d(temp_in, temp_out); + for (j = 0; j < 8; ++j) + output[j + i * 8] = temp_out[j] >> 1; } } +#endif void vp9_short_walsh4x4_x8_c(short *input, short *output, int pitch) { int i; @@ -961,7 +940,6 @@ void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) { vp9_short_walsh4x4_x8_c(input, output, pitch); vp9_short_walsh4x4_x8_c(input + 4, output + 16, pitch); } -#endif #define TEST_INT_16x16_DCT 1 #if !TEST_INT_16x16_DCT @@ -1174,6 +1152,9 @@ void vp9_short_fdct16x16_c(short *input, short *out, int pitch) { } #else + +#define NEW_FDCT16 1 +#if !NEW_FDCT16 static const int16_t C1 = 16305; static const int16_t C2 = 16069; static const int16_t C3 = 15679; @@ -1393,9 +1374,138 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { } #undef RIGHT_SHIFT #undef ROUNDING -#endif -#if !CONFIG_DWTDCTHYBRID +#else +// Rewrote to use same algorithm as others. +static void fdct16_1d(int16_t input[16], int16_t output[16]) { + int16_t step[16]; + int temp1, temp2; + + // step 1 + step[ 0] = input[0] + input[15]; + step[ 1] = input[1] + input[14]; + step[ 2] = input[2] + input[13]; + step[ 3] = input[3] + input[12]; + step[ 4] = input[4] + input[11]; + step[ 5] = input[5] + input[10]; + step[ 6] = input[6] + input[ 9]; + step[ 7] = input[7] + input[ 8]; + step[ 8] = input[7] - input[ 8]; + step[ 9] = input[6] - input[ 9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + fdct8_1d(step, step); + + // step 2 + output[8] = step[8]; + output[9] = step[9]; + temp1 = (-step[10] + step[13]) * cospi_16_64; + temp2 = (-step[11] + step[12]) * cospi_16_64; + output[10] = dct_const_round_shift(temp1); + output[11] = dct_const_round_shift(temp2); + temp1 = (step[11] + step[12]) * cospi_16_64; + temp2 = (step[10] + step[13]) * cospi_16_64; + output[12] = dct_const_round_shift(temp1); + output[13] = dct_const_round_shift(temp2); + output[14] = step[14]; + output[15] = step[15]; + + // step 3 + step[ 8] = output[8] + output[11]; + step[ 9] = output[9] + output[10]; + step[ 10] = output[9] - output[10]; + step[ 11] = output[8] - output[11]; + step[ 12] = -output[12] + output[15]; + step[ 13] = -output[13] + output[14]; + step[ 14] = output[13] + output[14]; + step[ 15] = output[12] + output[15]; + + // step 4 + output[8] = step[8]; + temp1 = -step[9] * cospi_8_64 + step[14] * cospi_24_64; + temp2 = -step[10] * cospi_24_64 - step[13] * cospi_8_64; + output[9] = dct_const_round_shift(temp1); + output[10] = dct_const_round_shift(temp2); + output[11] = step[11]; + output[12] = step[12]; + temp1 = -step[10] * cospi_8_64 + step[13] * cospi_24_64; + temp2 = step[9] * cospi_24_64 + step[14] * cospi_8_64; + output[13] = dct_const_round_shift(temp1); + output[14] = dct_const_round_shift(temp2); + output[15] = step[15]; + + // step 5 + step[8] = output[8] + output[9]; + step[9] = output[8] - output[9]; + step[10] = -output[10] + output[11]; + step[11] = output[10] + output[11]; + step[12] = output[12] + output[13]; + step[13] = output[12] - output[13]; + step[14] = -output[14] + output[15]; + step[15] = output[14] + output[15]; + + // step 6 + output[0] = step[0]; + output[8] = step[4]; + output[4] = step[2]; + output[12] = step[6]; + output[2] = step[1]; + output[10] = step[5]; + output[6] = step[3]; + output[14] = step[7]; + + temp1 = step[8] * cospi_30_64 + step[15] * cospi_2_64; + temp2 = step[9] * cospi_14_64 + step[14] * cospi_18_64; + output[1] = dct_const_round_shift(temp1); + output[9] = dct_const_round_shift(temp2); + + temp1 = step[10] * cospi_22_64 + step[13] * cospi_10_64; + temp2 = step[11] * cospi_6_64 + step[12] * cospi_26_64; + output[5] = dct_const_round_shift(temp1); + output[13] = dct_const_round_shift(temp2); + + temp1 = -step[11] * cospi_26_64 + step[12] * cospi_6_64; + temp2 = -step[10] * cospi_10_64 + step[13] * cospi_22_64; + output[3] = dct_const_round_shift(temp1); + output[11] = dct_const_round_shift(temp2); + + temp1 = -step[9] * cospi_18_64 + step[14] * cospi_14_64; + temp2 = -step[8] * cospi_2_64 + step[15] * cospi_30_64; + output[7] = dct_const_round_shift(temp1); + output[15] = dct_const_round_shift(temp2); +} + +void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { + int shortpitch = pitch >> 1; + int i, j; + int16_t output[256]; + int16_t temp_in[16], temp_out[16]; + + // First transform columns + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) + temp_in[j] = input[j * shortpitch + i]; + fdct16_1d(temp_in, temp_out); + for (j = 0; j < 16; j++) + output[j * 16 + i] = temp_out[j]; + } + + // Then transform rows + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = output[j + i * 16]; + fdct16_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out[j + i * 16] = temp_out[j]; + } +} +#endif +#endif #define TEST_INT_32x32_DCT 1 @@ -2134,706 +2244,3 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { } #endif - -#else // CONFIG_DWTDCTHYBRID - -#if DWT_TYPE == 53 - -// Note: block length must be even for this implementation -static void analysis_53_row(int length, short *x, - short *lowpass, short *highpass) { - int n; - short r, *a, *b; - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *a++ = (r = *x++) << 1; - *b++ = *x - ((r + x[1] + 1) >> 1); - x++; - } - *a = (r = *x++) << 1; - *b = *x - r; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ += (r + (*b) + 1) >> 1; - r = *b++; - } -} - -static void analysis_53_col(int length, short *x, - short *lowpass, short *highpass) { - int n; - short r, *a, *b; - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *a++ = (r = *x++); - *b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2; - x++; - } - *a = (r = *x++); - *b = (*x - r + 1) >> 1; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ += (r + (*b) + 1) >> 1; - r = *b++; - } -} - -static void dyadic_analyze_53(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { - int lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; - } - } - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); - analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i + nh] = c[i * pitch_c + j]; - analysis_53_col(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i]; - } - } -} - -#elif DWT_TYPE == 26 - -static void analysis_26_row(int length, short *x, - short *lowpass, short *highpass) { - int i, n; - short r, s, *a, *b; - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - r = *x++; - s = *x++; - *a++ = r + s; - *b++ = r - s; - } - n = length >> 1; - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ -= (r - a[1] + 4) >> 3; - r = *a++; - } - *b -= (r - *a + 4) >> 3; - } -} - -static void analysis_26_col(int length, short *x, - short *lowpass, short *highpass) { - int i, n; - short r, s, *a, *b; - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - r = *x++; - s = *x++; - *a++ = (r + s + 1) >> 1; - *b++ = (r - s + 1) >> 1; - } - n = length >> 1; - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ -= (r - a[1] + 4) >> 3; - r = *a++; - } - *b -= (r - *a + 4) >> 3; - } -} - -static void dyadic_analyze_26(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { - int lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; - } - } - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); - analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i + nh] = c[i * pitch_c + j]; - analysis_26_col(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i]; - } - } -} - -#elif DWT_TYPE == 97 - -static void analysis_97(int length, double *x, - double *lowpass, double *highpass) { - static const double a_predict1 = -1.586134342; - static const double a_update1 = -0.05298011854; - static const double a_predict2 = 0.8829110762; - static const double a_update2 = 0.4435068522; - static const double s_low = 1.149604398; - static const double s_high = 1/1.149604398; - int i; - double y[DWT_MAX_LENGTH]; - // Predict 1 - for (i = 1; i < length - 2; i += 2) { - x[i] += a_predict1 * (x[i - 1] + x[i + 1]); - } - x[length - 1] += 2 * a_predict1 * x[length - 2]; - // Update 1 - for (i = 2; i < length; i += 2) { - x[i] += a_update1 * (x[i - 1] + x[i + 1]); - } - x[0] += 2 * a_update1 * x[1]; - // Predict 2 - for (i = 1; i < length - 2; i += 2) { - x[i] += a_predict2 * (x[i - 1] + x[i + 1]); - } - x[length - 1] += 2 * a_predict2 * x[length - 2]; - // Update 2 - for (i = 2; i < length; i += 2) { - x[i] += a_update2 * (x[i - 1] + x[i + 1]); - } - x[0] += 2 * a_update2 * x[1]; - memcpy(y, x, sizeof(*y) * length); - // Scale and pack - for (i = 0; i < length / 2; i++) { - lowpass[i] = y[2 * i] * s_low; - highpass[i] = y[2 * i + 1] * s_high; - } -} - -static void dyadic_analyze_97(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { - int lv, i, j, nh, nw, hh = height, hw = width; - double buffer[2 * DWT_MAX_LENGTH]; - double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; - } - } - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer)); - analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH], - &y[i * DWT_MAX_LENGTH] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i + nh] = y[i * DWT_MAX_LENGTH + j]; - analysis_97(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = round(buffer[i]); - } - } -} - -#endif // DWT_TYPE - -// TODO(debargha): Implement the scaling differently so as not to have to -// use the floating point dct -static void dct16x16_1d_f(double input[16], double output[16]) { - static const double C1 = 0.995184726672197; - static const double C2 = 0.98078528040323; - static const double C3 = 0.956940335732209; - static const double C4 = 0.923879532511287; - static const double C5 = 0.881921264348355; - static const double C6 = 0.831469612302545; - static const double C7 = 0.773010453362737; - static const double C8 = 0.707106781186548; - static const double C9 = 0.634393284163646; - static const double C10 = 0.555570233019602; - static const double C11 = 0.471396736825998; - static const double C12 = 0.38268343236509; - static const double C13 = 0.290284677254462; - static const double C14 = 0.195090322016128; - static const double C15 = 0.098017140329561; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double step[16]; - double intermediate[16]; - double temp1, temp2; - - // step 1 - step[ 0] = input[0] + input[15]; - step[ 1] = input[1] + input[14]; - step[ 2] = input[2] + input[13]; - step[ 3] = input[3] + input[12]; - step[ 4] = input[4] + input[11]; - step[ 5] = input[5] + input[10]; - step[ 6] = input[6] + input[ 9]; - step[ 7] = input[7] + input[ 8]; - step[ 8] = input[7] - input[ 8]; - step[ 9] = input[6] - input[ 9]; - step[10] = input[5] - input[10]; - step[11] = input[4] - input[11]; - step[12] = input[3] - input[12]; - step[13] = input[2] - input[13]; - step[14] = input[1] - input[14]; - step[15] = input[0] - input[15]; - - // step 2 - output[0] = step[0] + step[7]; - output[1] = step[1] + step[6]; - output[2] = step[2] + step[5]; - output[3] = step[3] + step[4]; - output[4] = step[3] - step[4]; - output[5] = step[2] - step[5]; - output[6] = step[1] - step[6]; - output[7] = step[0] - step[7]; - - temp1 = step[ 8]*C7; - temp2 = step[15]*C9; - output[ 8] = temp1 + temp2; - - temp1 = step[ 9]*C11; - temp2 = step[14]*C5; - output[ 9] = temp1 - temp2; - - temp1 = step[10]*C3; - temp2 = step[13]*C13; - output[10] = temp1 + temp2; - - temp1 = step[11]*C15; - temp2 = step[12]*C1; - output[11] = temp1 - temp2; - - temp1 = step[11]*C1; - temp2 = step[12]*C15; - output[12] = temp2 + temp1; - - temp1 = step[10]*C13; - temp2 = step[13]*C3; - output[13] = temp2 - temp1; - - temp1 = step[ 9]*C5; - temp2 = step[14]*C11; - output[14] = temp2 + temp1; - - temp1 = step[ 8]*C9; - temp2 = step[15]*C7; - output[15] = temp2 - temp1; - - // step 3 - step[ 0] = output[0] + output[3]; - step[ 1] = output[1] + output[2]; - step[ 2] = output[1] - output[2]; - step[ 3] = output[0] - output[3]; - - temp1 = output[4]*C14; - temp2 = output[7]*C2; - step[ 4] = temp1 + temp2; - - temp1 = output[5]*C10; - temp2 = output[6]*C6; - step[ 5] = temp1 + temp2; - - temp1 = output[5]*C6; - temp2 = output[6]*C10; - step[ 6] = temp2 - temp1; - - temp1 = output[4]*C2; - temp2 = output[7]*C14; - step[ 7] = temp2 - temp1; - - step[ 8] = output[ 8] + output[11]; - step[ 9] = output[ 9] + output[10]; - step[10] = output[ 9] - output[10]; - step[11] = output[ 8] - output[11]; - - step[12] = output[12] + output[15]; - step[13] = output[13] + output[14]; - step[14] = output[13] - output[14]; - step[15] = output[12] - output[15]; - - // step 4 - output[ 0] = (step[ 0] + step[ 1]); - output[ 8] = (step[ 0] - step[ 1]); - - temp1 = step[2]*C12; - temp2 = step[3]*C4; - temp1 = temp1 + temp2; - output[ 4] = 2*(temp1*C8); - - temp1 = step[2]*C4; - temp2 = step[3]*C12; - temp1 = temp2 - temp1; - output[12] = 2*(temp1*C8); - - output[ 2] = 2*((step[4] + step[ 5])*C8); - output[14] = 2*((step[7] - step[ 6])*C8); - - temp1 = step[4] - step[5]; - temp2 = step[6] + step[7]; - output[ 6] = (temp1 + temp2); - output[10] = (temp1 - temp2); - - intermediate[8] = step[8] + step[14]; - intermediate[9] = step[9] + step[15]; - - temp1 = intermediate[8]*C12; - temp2 = intermediate[9]*C4; - temp1 = temp1 - temp2; - output[3] = 2*(temp1*C8); - - temp1 = intermediate[8]*C4; - temp2 = intermediate[9]*C12; - temp1 = temp2 + temp1; - output[13] = 2*(temp1*C8); - - output[ 9] = 2*((step[10] + step[11])*C8); - - intermediate[11] = step[10] - step[11]; - intermediate[12] = step[12] + step[13]; - intermediate[13] = step[12] - step[13]; - intermediate[14] = step[ 8] - step[14]; - intermediate[15] = step[ 9] - step[15]; - - output[15] = (intermediate[11] + intermediate[12]); - output[ 1] = -(intermediate[11] - intermediate[12]); - - output[ 7] = 2*(intermediate[13]*C8); - - temp1 = intermediate[14]*C12; - temp2 = intermediate[15]*C4; - temp1 = temp1 - temp2; - output[11] = -2*(temp1*C8); - - temp1 = intermediate[14]*C4; - temp2 = intermediate[15]*C12; - temp1 = temp2 + temp1; - output[ 5] = 2*(temp1*C8); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, - int scale) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - int shortpitch = pitch >> 1; - int i, j; - double output[256]; - // First transform columns - for (i = 0; i < 16; i++) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; j++) - temp_in[j] = input[j*shortpitch + i]; - dct16x16_1d_f(temp_in, temp_out); - for (j = 0; j < 16; j++) - output[j*16 + i] = temp_out[j]; - } - // Then transform rows - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = output[j + i*16]; - dct16x16_1d_f(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j + i*16] = temp_out[j]; - } - // Scale by some magic number - for (i = 0; i < 256; i++) - out[i] = (short)round(output[i] / (2 << scale)); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) { - int j1, i, j, k; - float b[8]; - float b1[8]; - float d[8][8]; - float f0 = (float) .7071068; - float f1 = (float) .4903926; - float f2 = (float) .4619398; - float f3 = (float) .4157348; - float f4 = (float) .3535534; - float f5 = (float) .2777851; - float f6 = (float) .1913417; - float f7 = (float) .0975452; - pitch = pitch / 2; - for (i = 0, k = 0; i < 8; i++, k += pitch) { - for (j = 0; j < 8; j++) { - b[j] = (float)(block[k + j] << (3 - scale)); - } - /* Horizontal transform */ - for (j = 0; j < 4; j++) { - j1 = 7 - j; - b1[j] = b[j] + b[j1]; - b1[j1] = b[j] - b[j1]; - } - b[0] = b1[0] + b1[3]; - b[1] = b1[1] + b1[2]; - b[2] = b1[1] - b1[2]; - b[3] = b1[0] - b1[3]; - b[4] = b1[4]; - b[5] = (b1[6] - b1[5]) * f0; - b[6] = (b1[6] + b1[5]) * f0; - b[7] = b1[7]; - d[i][0] = (b[0] + b[1]) * f4; - d[i][4] = (b[0] - b[1]) * f4; - d[i][2] = b[2] * f6 + b[3] * f2; - d[i][6] = b[3] * f6 - b[2] * f2; - b1[4] = b[4] + b[5]; - b1[7] = b[7] + b[6]; - b1[5] = b[4] - b[5]; - b1[6] = b[7] - b[6]; - d[i][1] = b1[4] * f7 + b1[7] * f1; - d[i][5] = b1[5] * f3 + b1[6] * f5; - d[i][7] = b1[7] * f7 - b1[4] * f1; - d[i][3] = b1[6] * f3 - b1[5] * f5; - } - /* Vertical transform */ - for (i = 0; i < 8; i++) { - for (j = 0; j < 4; j++) { - j1 = 7 - j; - b1[j] = d[j][i] + d[j1][i]; - b1[j1] = d[j][i] - d[j1][i]; - } - b[0] = b1[0] + b1[3]; - b[1] = b1[1] + b1[2]; - b[2] = b1[1] - b1[2]; - b[3] = b1[0] - b1[3]; - b[4] = b1[4]; - b[5] = (b1[6] - b1[5]) * f0; - b[6] = (b1[6] + b1[5]) * f0; - b[7] = b1[7]; - d[0][i] = (b[0] + b[1]) * f4; - d[4][i] = (b[0] - b[1]) * f4; - d[2][i] = b[2] * f6 + b[3] * f2; - d[6][i] = b[3] * f6 - b[2] * f2; - b1[4] = b[4] + b[5]; - b1[7] = b[7] + b[6]; - b1[5] = b[4] - b[5]; - b1[6] = b[7] - b[6]; - d[1][i] = b1[4] * f7 + b1[7] * f1; - d[5][i] = b1[5] * f3 + b1[6] * f5; - d[7][i] = b1[7] * f7 - b1[4] * f1; - d[3][i] = b1[6] * f3 - b1[5] * f5; - } - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - *(coefs + j + i * 8) = (short) floor(d[i][j] + 0.5); - } - } - return; -} - -#define divide_bits(d, n) ((n) < 0 ? (d) << (n) : (d) >> (n)) - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - -void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { - // assume out is a 32x32 buffer - short buffer[16 * 16]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 97 - dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 53 - dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } -} - -#elif DWTDCT_TYPE == DWTDCT16X16 - -void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { - // assume out is a 32x32 buffer - short buffer[16 * 16]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 97 - dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 53 - dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); - vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16); -} - -#elif DWTDCT_TYPE == DWTDCT8X8 - -void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { - // assume out is a 32x32 buffer - short buffer[8 * 8]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(2, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 97 - dyadic_analyze_97(2, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 53 - dyadic_analyze_53(2, 32, 32, input, short_pitch, out, 32); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct8x8_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32, buffer + i * 8, sizeof(short) * 8); - - vp9_short_fdct8x8_c_f(out + 8, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32 + 8, buffer + i * 8, sizeof(short) * 8); - - vp9_short_fdct8x8_c_f(out + 32 * 8, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32 + 32 * 8, buffer + i * 8, sizeof(short) * 8); - - vp9_short_fdct8x8_c_f(out + 33 * 8, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32 + 33 * 8, buffer + i * 8, sizeof(short) * 8); - - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } -} - -#endif - -#if CONFIG_TX64X64 -void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { - // assume out is a 64x64 buffer - short buffer[16 * 16]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64); -#elif DWT_TYPE == 97 - dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64); -#elif DWT_TYPE == 53 - dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16); - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - for (i = 0; i < 16; ++i) { - for (j = 16; j < 48; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 16; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } -#elif DWTDCT_TYPE == DWTDCT16X16 - vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16); - - // There is no dct used on the highest bands for now. - // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS - // TODO(debargha): experiment with turning these coeffs to 0 - for (i = 0; i < 32; ++i) { - for (j = 32; j < 64; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 32; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } -#endif // DWTDCT_TYPE -} -#endif // CONFIG_TX64X64 -#endif // CONFIG_DWTDCTHYBRID diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index c5f717f5f..bedd10f1f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -30,7 +30,7 @@ #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9_rtcd.h" +#include "./vp9_rtcd.h" #include <stdio.h> #include <math.h> #include <limits.h> @@ -1226,13 +1226,30 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { xd->fullpixel_mask = 0xfffffff8; } +static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { + if (lossless) { + cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8; + cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + cpi->mb.optimize = 0; + cpi->common.filter_level = 0; + cpi->zbin_mode_boost_enabled = FALSE; + cpi->common.txfm_mode = ONLY_4X4; + } else { + cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; + cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm; + } +} + + static void encode_frame_internal(VP9_COMP *cpi) { int mb_row; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - - TOKENEXTRA *tp = cpi->tok; int totalrate; // printf("encode_frame_internal frame %d (%d)\n", @@ -1284,6 +1301,14 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cpi->mb_mv_ref_count); #endif + + // force lossless mode when Q0 is selected + cpi->mb.e_mbd.lossless = (cm->base_qindex == 0 && + cm->y1dc_delta_q == 0 && + cm->uvdc_delta_q == 0 && + cm->uvac_delta_q == 0); + switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); + vp9_frame_init_quantizer(cpi); vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); @@ -1313,19 +1338,19 @@ static void encode_frame_internal(VP9_COMP *cpi) { { // Take tiles into account and give start/end MB - int tile; + int tile_col; + TOKENEXTRA *tp = cpi->tok; + + for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { + TOKENEXTRA *tp_old = tp; - for (tile = 0; tile < cm->tile_columns; tile++) { // For each row of SBs in the frame - cm->cur_tile_idx = tile; - vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start, - &cm->cur_tile_mb_col_end); + vp9_get_tile_col_offsets(cm, tile_col); for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { encode_sb_row(cpi, mb_row, &tp, &totalrate); } + cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); } - - cpi->tok_count = (unsigned int)(tp - cpi->tok); } vpx_usec_timer_mark(&emr_timer); @@ -1536,11 +1561,12 @@ void vp9_encode_frame(VP9_COMP *cpi) { pred_type = HYBRID_PREDICTION; /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */ -#if CONFIG_LOSSLESS + + cpi->mb.e_mbd.lossless = 0; if (cpi->oxcf.lossless) { txfm_type = ONLY_4X4; + cpi->mb.e_mbd.lossless = 1; } else -#endif /* FIXME (rbultje) * this is a hack (no really), basically to work around the complete * nonsense coefficient cost prediction for keyframes. The probabilities @@ -1688,10 +1714,7 @@ void vp9_setup_block_ptrs(MACROBLOCK *x) { } } - x->block[24].src_diff = x->src_diff + 384; - - - for (i = 0; i < 25; i++) { + for (i = 0; i < 24; i++) { x->block[i].coeff = x->coeff + i * 16; } } diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 1dd30130a..a52763080 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -62,7 +62,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob); #endif } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b) ; vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32); } @@ -165,7 +165,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { tx_type, 8, xd->block[idx].eob); #endif } else { - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); } @@ -183,13 +183,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob); #endif } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, b, b + 1); vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32); i++; } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); } @@ -222,7 +222,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, vp9_subtract_b(be, b, 8); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16); + x->fwd_txm4x4(be->src_diff, be->coeff, 16); x->quantize_b_4x4(be, b); vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 52eabf129..efe95c911 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -166,54 +166,31 @@ static void subtract_mb(MACROBLOCK *x) { x->e_mbd.predictor, x->src.uv_stride); } -static void build_dcblock_4x4(MACROBLOCK *x) { - int16_t *src_diff_ptr = &x->src_diff[384]; - int i; - - for (i = 0; i < 16; i++) { - src_diff_ptr[i] = x->coeff[i * 16]; - x->coeff[i * 16] = 0; - } -} - void vp9_transform_mby_4x4(MACROBLOCK *x) { int i; MACROBLOCKD *xd = &x->e_mbd; - int has_2nd_order = get_2nd_order_usage(xd); for (i = 0; i < 16; i++) { BLOCK *b = &x->block[i]; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4); } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], + x->fwd_txm8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); i++; } else { - x->vp9_short_fdct4x4(&x->block[i].src_diff[0], + x->fwd_txm4x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } } - - if (has_2nd_order) { - // build dc block from 16 y dc values - build_dcblock_4x4(x); - - // do 2nd order transform on the dc block - x->short_walsh4x4(&x->block[24].src_diff[0], - &x->block[24].coeff[0], 8); - } else { - vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0])); - } } void vp9_transform_mbuv_4x4(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 2) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], + x->fwd_txm8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } @@ -223,37 +200,18 @@ static void transform_mb_4x4(MACROBLOCK *x) { vp9_transform_mbuv_4x4(x); } -static void build_dcblock_8x8(MACROBLOCK *x) { - int16_t *src_diff_ptr = x->block[24].src_diff; - int i; - - for (i = 0; i < 16; i++) { - src_diff_ptr[i] = 0; - } - src_diff_ptr[0] = x->coeff[0 * 16]; - src_diff_ptr[1] = x->coeff[4 * 16]; - src_diff_ptr[4] = x->coeff[8 * 16]; - src_diff_ptr[8] = x->coeff[12 * 16]; - x->coeff[0 * 16] = 0; - x->coeff[4 * 16] = 0; - x->coeff[8 * 16] = 0; - x->coeff[12 * 16] = 0; -} - void vp9_transform_mby_8x8(MACROBLOCK *x) { int i; MACROBLOCKD *xd = &x->e_mbd; TX_TYPE tx_type; - int has_2nd_order = get_2nd_order_usage(xd); for (i = 0; i < 9; i += 8) { BLOCK *b = &x->block[i]; tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8); } else { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } } @@ -261,31 +219,19 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { BLOCK *b = &x->block[i]; tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8); } else { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i + 2].coeff[0], 32); } } - - if (has_2nd_order) { - // build dc block from 2x2 y dc values - build_dcblock_8x8(x); - - // do 2nd order transform on the dc block - x->short_fhaar2x2(&x->block[24].src_diff[0], - &x->block[24].coeff[0], 8); - } else { - vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0])); - } } void vp9_transform_mbuv_8x8(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 4) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } @@ -303,7 +249,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) { if (tx_type != DCT_DCT) { vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16); } else { - x->vp9_short_fdct16x16(&x->block[0].src_diff[0], + x->fwd_txm16x16(&x->block[0].src_diff[0], &x->block[0].coeff[0], 32); } } @@ -321,9 +267,9 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) { void vp9_transform_sbuv_16x16(MACROBLOCK *x) { SUPERBLOCK * const x_sb = &x->sb_coeff_data; vp9_clear_system_state(); - x->vp9_short_fdct16x16(x_sb->src_diff + 1024, + x->fwd_txm16x16(x_sb->src_diff + 1024, x_sb->coeff + 1024, 32); - x->vp9_short_fdct16x16(x_sb->src_diff + 1280, + x->fwd_txm16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32); } @@ -342,13 +288,10 @@ struct vp9_token_state { // TODO: experiments to find optimal multiple numbers #define Y1_RD_MULT 4 #define UV_RD_MULT 2 -#define Y2_RD_MULT 4 static const int plane_rd_mult[4] = { Y1_RD_MULT, - Y2_RD_MULT, UV_RD_MULT, - Y1_RD_MULT }; #define UPDATE_RD_COST()\ @@ -361,6 +304,13 @@ static const int plane_rd_mult[4] = { }\ } +// This function is a place holder for now but may ultimately need +// to scan previous tokens to work out the correct context. +static int trellis_get_coeff_context(int token) { + int recent_energy = 0; + return vp9_get_coef_context(&recent_energy, token); +} + static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int tx_size) { @@ -372,23 +322,19 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; int eob = d->eob, final_eob, sz = 0; - int i0 = (type == PLANE_TYPE_Y_NO_DC); + const int i0 = 0; int rc, x, next; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; int err_mult = plane_rd_mult[type]; int default_eob; - int const *scan, *bands; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; -#endif + int const *scan; switch (tx_size) { default: case TX_4X4: scan = vp9_default_zig_zag1d_4x4; - bands = vp9_coef_bands_4x4; default_eob = 16; // TODO: this isn't called (for intra4x4 modes), but will be left in // since it could be used later @@ -415,18 +361,13 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, break; case TX_8X8: scan = vp9_default_zig_zag1d_8x8; - bands = vp9_coef_bands_8x8; default_eob = 64; break; case TX_16X16: scan = vp9_default_zig_zag1d_16x16; - bands = vp9_coef_bands_16x16; default_eob = 256; break; } -#if CONFIG_NEWCOEFCONTEXT - neighbors = vp9_get_coef_neighbors_handle(scan); -#endif /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; @@ -458,13 +399,8 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, t0 = (vp9_dct_value_tokens_ptr + x)->Token; /* Consider both possible successor states. */ if (next < default_eob) { - band = bands[i + 1]; - pt = vp9_prev_token_class[t0]; -#if CONFIG_NEWCOEFCONTEXT - if (NEWCOEFCONTEXT_BAND_COND(band)) - pt = vp9_get_coef_neighbor_context( - qcoeff_ptr, i0, neighbors, scan[i + 1]); -#endif + band = get_coef_band(i + 1); + pt = trellis_get_coeff_context(t0); rate0 += mb->token_costs[tx_size][type][band][pt][tokens[next][0].token]; rate1 += @@ -510,36 +446,14 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token; } if (next < default_eob) { - band = bands[i + 1]; + band = get_coef_band(i + 1); if (t0 != DCT_EOB_TOKEN) { -#if CONFIG_NEWCOEFCONTEXT - int tmp = qcoeff_ptr[scan[i]]; - qcoeff_ptr[scan[i]] = x; - if (NEWCOEFCONTEXT_BAND_COND(band)) - pt = vp9_get_coef_neighbor_context( - qcoeff_ptr, i0, neighbors, scan[i + 1]); - else - pt = vp9_prev_token_class[t0]; - qcoeff_ptr[scan[i]] = tmp; -#else - pt = vp9_prev_token_class[t0]; -#endif + pt = trellis_get_coeff_context(t0); rate0 += mb->token_costs[tx_size][type][band][pt][ tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { -#if CONFIG_NEWCOEFCONTEXT - int tmp = qcoeff_ptr[scan[i]]; - qcoeff_ptr[scan[i]] = x; - if (NEWCOEFCONTEXT_BAND_COND(band)) - pt = vp9_get_coef_neighbor_context( - qcoeff_ptr, i0, neighbors, scan[i + 1]); - else - pt = vp9_prev_token_class[t1]; - qcoeff_ptr[scan[i]] = tmp; -#else - pt = vp9_prev_token_class[t1]; -#endif + pt = trellis_get_coeff_context(t1); rate1 += mb->token_costs[tx_size][type][band][pt][ tokens[next][1].token]; } @@ -567,7 +481,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, * add a new trellis node, but we do need to update the costs. */ else { - band = bands[i + 1]; + band = get_coef_band(i + 1); t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ @@ -584,7 +498,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, } /* Now pick the best path through the whole trellis. */ - band = bands[i + 1]; + band = get_coef_band(i + 1); VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; @@ -611,80 +525,11 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, final_eob++; d->eob = final_eob; - *a = *l = (d->eob > !type); -} - -/************************************************************************** -our inverse hadamard transform effectively is weighted sum of all 16 inputs -with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And -dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the -output after inverse wht and idct will be all zero. A sum of absolute value -smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht -fall between -65 and +65. -**************************************************************************/ -#define SUM_2ND_COEFF_THRESH 65 - -static void check_reset_2nd_coeffs(MACROBLOCKD *xd, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int sum = 0; - int i; - BLOCKD *bd = &xd->block[24]; - if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH - && bd->dequant[1] >= SUM_2ND_COEFF_THRESH) - return; - - for (i = 0; i < bd->eob; i++) { - int coef = bd->dqcoeff[vp9_default_zig_zag1d_4x4[i]]; - sum += (coef >= 0) ? coef : -coef; - if (sum >= SUM_2ND_COEFF_THRESH) - return; - } - - if (sum < SUM_2ND_COEFF_THRESH) { - for (i = 0; i < bd->eob; i++) { - int rc = vp9_default_zig_zag1d_4x4[i]; - bd->qcoeff[rc] = 0; - bd->dqcoeff[rc] = 0; - } - bd->eob = 0; - *a = *l = (bd->eob != 0); - } -} - -#define SUM_2ND_COEFF_THRESH_8X8 32 -static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int sum = 0; - BLOCKD *bd = &xd->block[24]; - int coef; - - coef = bd->dqcoeff[0]; - sum += (coef >= 0) ? coef : -coef; - coef = bd->dqcoeff[1]; - sum += (coef >= 0) ? coef : -coef; - coef = bd->dqcoeff[4]; - sum += (coef >= 0) ? coef : -coef; - coef = bd->dqcoeff[8]; - sum += (coef >= 0) ? coef : -coef; - - if (sum < SUM_2ND_COEFF_THRESH_8X8) { - bd->qcoeff[0] = 0; - bd->dqcoeff[0] = 0; - bd->qcoeff[1] = 0; - bd->dqcoeff[1] = 0; - bd->qcoeff[4] = 0; - bd->dqcoeff[4] = 0; - bd->qcoeff[8] = 0; - bd->dqcoeff[8] = 0; - bd->eob = 0; - *a = *l = (bd->eob != 0); - } + *a = *l = (d->eob > 0); } void vp9_optimize_mby_4x4(MACROBLOCK *x) { int b; - PLANE_TYPE type; - int has_2nd_order; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; @@ -698,25 +543,11 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) { ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - has_2nd_order = get_2nd_order_usage(&x->e_mbd); - - type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; - for (b = 0; b < 16; b++) { - optimize_b(x, b, type, + optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } - - if (has_2nd_order) { - b = 24; - optimize_b(x, b, PLANE_TYPE_Y2, - ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], TX_4X4); - check_reset_2nd_coeffs(&x->e_mbd, - ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b]); - } } void vp9_optimize_mbuv_4x4(MACROBLOCK *x) { @@ -748,11 +579,9 @@ static void optimize_mb_4x4(MACROBLOCK *x) { void vp9_optimize_mby_8x8(MACROBLOCK *x) { int b; - PLANE_TYPE type; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - int has_2nd_order = get_2nd_order_usage(&x->e_mbd); if (!x->e_mbd.above_context || !x->e_mbd.left_context) return; @@ -762,7 +591,6 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b += 4) { ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; @@ -773,17 +601,10 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT above_ec = a[0]; ENTROPY_CONTEXT left_ec = l[0]; #endif - optimize_b(x, b, type, &above_ec, &left_ec, TX_8X8); + optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8); a[1] = a[0] = above_ec; l[1] = l[0] = left_ec; } - - // 8x8 always have 2nd order block - if (has_2nd_order) { - check_reset_8x8_2nd_coeffs(&x->e_mbd, - ta + vp9_block2above[TX_8X8][24], - tl + vp9_block2left[TX_8X8][24]); - } } void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 3791737d2..64d8d7d6c 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -19,11 +19,6 @@ #include "vp9/common/vp9_findnearmv.h" #include "vp9/common/vp9_common.h" -#ifdef ENTROPY_STATS -static int mv_ref_ct [31] [4] [2]; -static int mv_mode_cts [4] [2]; -#endif - void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) { int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL + ((ref_mv->as_mv.col & 7) ? 1 : 0); @@ -2103,21 +2098,22 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #ifdef ENTROPY_STATS -void print_mode_context(void) { +void print_mode_context(VP9_COMMON *pc) { FILE *f = fopen("vp9_modecont.c", "a"); int i, j; fprintf(f, "#include \"vp9_entropy.h\"\n"); - fprintf(f, "const int vp9_mode_contexts[6][4] ="); + fprintf(f, "const int vp9_mode_contexts[INTER_MODE_CONTEXTS][4] ="); fprintf(f, "{\n"); - for (j = 0; j < 6; j++) { + for (j = 0; j < INTER_MODE_CONTEXTS; j++) { fprintf(f, " {/* %d */ ", j); fprintf(f, " "); for (i = 0; i < 4; i++) { int this_prob; // context probs - this_prob = get_binary_prob(mv_ref_ct[j][i][0], mv_ref_ct[j][i][1]); + this_prob = get_binary_prob(pc->fc.mv_ref_ct[j][i][0], + pc->fc.mv_ref_ct[j][i][1]); fprintf(f, "%5d, ", this_prob); } @@ -2128,44 +2124,4 @@ void print_mode_context(void) { fclose(f); } -/* MV ref count ENTROPY_STATS stats code */ -void init_mv_ref_counts() { - vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); -} - -void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) { - if (m == ZEROMV) { - ++mv_ref_ct [ct[0]] [0] [0]; - ++mv_mode_cts[0][0]; - } else { - ++mv_ref_ct [ct[0]] [0] [1]; - ++mv_mode_cts[0][1]; - - if (m == NEARESTMV) { - ++mv_ref_ct [ct[1]] [1] [0]; - ++mv_mode_cts[1][0]; - } else { - ++mv_ref_ct [ct[1]] [1] [1]; - ++mv_mode_cts[1][1]; - - if (m == NEARMV) { - ++mv_ref_ct [ct[2]] [2] [0]; - ++mv_mode_cts[2][0]; - } else { - ++mv_ref_ct [ct[2]] [2] [1]; - ++mv_mode_cts[2][1]; - - if (m == NEWMV) { - ++mv_ref_ct [ct[3]] [3] [0]; - ++mv_mode_cts[3][0]; - } else { - ++mv_ref_ct [ct[3]] [3] [1]; - ++mv_mode_cts[3][1]; - } - } - } - } -} - #endif/* END MV ref count ENTROPY_STATS stats code */ diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 358d10bc6..06acdbe58 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -16,9 +16,7 @@ #include "vp9/encoder/vp9_variance.h" #ifdef ENTROPY_STATS -extern void init_mv_ref_counts(); -extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); -void print_mode_context(void); +void print_mode_context(VP9_COMMON *pc); #endif diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 73b7b1f5f..9d1ae1131 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -753,12 +753,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->quarter_pixel_search = 1; sf->half_pixel_search = 1; sf->iterative_sub_pixel = 1; -#if CONFIG_LOSSLESS - sf->optimize_coefficients = 0; -#else - sf->optimize_coefficients = 1; -#endif sf->no_skip_block4x4_search = 1; + if (cpi->oxcf.lossless) + sf->optimize_coefficients = 0; + else + sf->optimize_coefficients = 1; + sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; sf->static_segmentation = 1; @@ -841,28 +841,19 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } } - cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16; - cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8; - cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4; - cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4; - cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2; - -#if CONFIG_LOSSLESS - if (cpi->oxcf.lossless) { - cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8; - cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4; - cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless; + cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16; + cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8; + cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; + cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; + if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { + cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8; + cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8; } -#endif cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair; cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8; cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16; - cpi->mb.quantize_b_2x2 = vp9_regular_quantize_b_2x2; vp9_init_quantizer(cpi); @@ -953,7 +944,7 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->tok); { - unsigned int tokens = cm->mb_rows * cm->mb_cols * 24 * 16; + unsigned int tokens = cm->mb_rows * cm->mb_cols * (24 * 16 + 1); CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } @@ -1107,9 +1098,13 @@ rescale(int val, int num, int denom) { return (int)(llval * llnum / llden); } -static void set_tile_limits(VP9_COMMON *cm) { +static void set_tile_limits(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; int min_log2_tiles, max_log2_tiles; + cm->log2_tile_columns = cpi->oxcf.tile_columns; + cm->log2_tile_rows = cpi->oxcf.tile_rows; + vp9_get_tile_n_bits(cm, &min_log2_tiles, &max_log2_tiles); max_log2_tiles += min_log2_tiles; if (cm->log2_tile_columns < min_log2_tiles) @@ -1117,6 +1112,7 @@ static void set_tile_limits(VP9_COMMON *cm) { else if (cm->log2_tile_columns > max_log2_tiles) cm->log2_tile_columns = max_log2_tiles; cm->tile_columns = 1 << cm->log2_tile_columns; + cm->tile_rows = 1 << cm->log2_tile_rows; } static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { @@ -1156,8 +1152,7 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; - cm->log2_tile_columns = cpi->oxcf.tile_columns; - set_tile_limits(cm); + set_tile_limits(cpi); #if VP9_TEMPORAL_ALT_REF { @@ -1218,20 +1213,14 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_idct4x4llm; - cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - -#if CONFIG_LOSSLESS cpi->oxcf.lossless = oxcf->lossless; if (cpi->oxcf.lossless) { - cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8; - cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8; - cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless; - cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + } else { + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm; } -#endif cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; @@ -1384,8 +1373,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->last_frame_distortion = 0; #endif - cm->log2_tile_columns = cpi->oxcf.tile_columns; - set_tile_limits(cm); + set_tile_limits(cpi); } #define M_LOG2_E 0.693147180559945309417 @@ -1655,10 +1643,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->rd_thresh_mult[i] = 128; } -#ifdef ENTROPY_STATS - init_mv_ref_counts(); -#endif - #define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].vf = VF; \ @@ -1745,7 +1729,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) { if (cpi->pass != 1) { print_context_counters(); print_tree_update_probs(); - print_mode_context(); + print_mode_context(&cpi->common); } #endif #ifdef NMV_STATS @@ -2472,15 +2456,9 @@ static void update_reference_frames(VP9_COMP * const cpi) { } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { - if (cm->no_lpf) { - cm->filter_level = 0; - } -#if CONFIG_LOSSLESS - else if (cpi->oxcf.lossless) { + if (cm->no_lpf || cpi->mb.e_mbd.lossless) { cm->filter_level = 0; - } -#endif - else { + } else { struct vpx_usec_timer timer; vp9_clear_system_state(); @@ -2632,11 +2610,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // For 2 Pass Only used where GF/ARF prediction quality // is above a threshold cpi->zbin_mode_boost = 0; -#if CONFIG_LOSSLESS - cpi->zbin_mode_boost_enabled = FALSE; -#else - cpi->zbin_mode_boost_enabled = TRUE; -#endif + + if (cpi->oxcf.lossless) + cpi->zbin_mode_boost_enabled = FALSE; + else + cpi->zbin_mode_boost_enabled = TRUE; + if (cpi->gfu_boost <= 400) { cpi->zbin_mode_boost_enabled = FALSE; } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7acaef472..813003388 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -87,11 +87,11 @@ typedef struct { signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; @@ -303,18 +303,12 @@ typedef struct VP9_COMP { DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]); MACROBLOCK mb; @@ -347,7 +341,7 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tok; - unsigned int tok_count; + unsigned int tok_count[1 << 6]; unsigned int frames_since_key; @@ -473,23 +467,23 @@ typedef struct VP9_COMP { vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4]; vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES_4X4]; vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_probs frame_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; - vp9_coeff_stats frame_hybrid_branch_ct_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4_HYBRID]; + vp9_coeff_probs frame_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4_HYBRID]; + vp9_coeff_stats frame_hybrid_branch_ct_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_probs frame_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; - vp9_coeff_stats frame_hybrid_branch_ct_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8_HYBRID]; + vp9_coeff_probs frame_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8_HYBRID]; + vp9_coeff_stats frame_hybrid_branch_ct_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; - vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16_HYBRID]; + vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16_HYBRID]; + vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32]; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index b5dbef0b3..aed379a5b 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -140,26 +140,15 @@ void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) { void vp9_quantize_mby_4x4_c(MACROBLOCK *x) { int i; - int has_2nd_order = get_2nd_order_usage(&x->e_mbd); for (i = 0; i < 16; i++) { TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]); if (tx_type != DCT_DCT) { - assert(has_2nd_order == 0); vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type); } else { x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]); } } - if (has_2nd_order) { - x->quantize_b_4x4(&x->block[24], &x->e_mbd.block[24]); - } else { - vpx_memset(x->e_mbd.block[24].qcoeff, 0, - 16 * sizeof(x->e_mbd.block[24].qcoeff[0])); - vpx_memset(x->e_mbd.block[24].dqcoeff, 0, - 16 * sizeof(x->e_mbd.block[24].dqcoeff[0])); - x->e_mbd.block[24].eob = 0; - } } void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) { @@ -174,97 +163,72 @@ void vp9_quantize_mb_4x4_c(MACROBLOCK *x) { vp9_quantize_mbuv_4x4_c(x); } -void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int zbin_zrun_index = 0; - int16_t *coeff_ptr = b->coeff; - int16_t *zbin_ptr = b->zbin; - int16_t *round_ptr = b->round; - int16_t *quant_ptr = b->quant; - uint8_t *quant_shift_ptr = b->quant_shift; +void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; - int16_t *dequant_ptr = d->dequant; - int zbin_oq_value = b->zbin_extra; - - // double q2nd = 4; - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); - eob = -1; + vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); + vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); if (!b->skip_block) { - for (i = 0; i < 4; i++) { - rc = vp9_default_zig_zag1d_4x4[i]; - z = coeff_ptr[rc]; - - zbin_boost_ptr = &b->zrun_zbin_boost[zbin_zrun_index]; - zbin_zrun_index += 4; - zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + int i, rc, eob; + int zbin; + int x, y, z, sz; + int zero_run; + int16_t *zbin_boost_ptr = b->zrun_zbin_boost; + int16_t *coeff_ptr = b->coeff; + int16_t *zbin_ptr = b->zbin; + int16_t *round_ptr = b->round; + int16_t *quant_ptr = b->quant; + uint8_t *quant_shift_ptr = b->quant_shift; + int16_t *dequant_ptr = d->dequant; + int zbin_oq_value = b->zbin_extra; + + eob = -1; + + // Special case for DC as it is the one triggering access in various + // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0] + { + z = coeff_ptr[0]; + zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value); + zero_run = 1; - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) if (x >= zbin) { - x += (round_ptr[rc]); - y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + x += (round_ptr[0]); + y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x)) + >> quant_shift_ptr[0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[0] = x; // write to destination + dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value if (y) { - eob = i; // last nonzero coeffs - zbin_zrun_index = 0; + eob = 0; // last nonzero coeffs + zero_run = 0; } } } - } - - d->eob = eob + 1; -} - -void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - int zero_run = 0; - int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int16_t *coeff_ptr = b->coeff; - int16_t *zbin_ptr = b->zbin; - int16_t *round_ptr = b->round; - int16_t *quant_ptr = b->quant; - uint8_t *quant_shift_ptr = b->quant_shift; - int16_t *qcoeff_ptr = d->qcoeff; - int16_t *dqcoeff_ptr = d->dqcoeff; - int16_t *dequant_ptr = d->dequant; - int zbin_oq_value = b->zbin_extra; - - vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); - vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); - - eob = -1; - - if (!b->skip_block) { - for (i = 0; i < 64; i++) { + for (i = 1; i < 64; i++) { rc = vp9_default_zig_zag1d_8x8[i]; z = coeff_ptr[rc]; - zbin = (zbin_ptr[rc != 0] + zbin_boost_ptr[zero_run] + zbin_oq_value); - zero_run += (zero_run < 15); + zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value); + // The original code was incrementing zero_run while keeping it at + // maximum 15 by adding "(zero_run < 15)". The same is achieved by + // removing the opposite of the sign mask of "(zero_run - 15)". + zero_run -= (zero_run - 15) >> 31; sz = (z >> 31); // sign of z x = (z ^ sz) - sz; // x = abs(z) if (x >= zbin) { x += (round_ptr[rc != 0]); - y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) - >> quant_shift_ptr[rc != 0]; // quantize (x) + y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x)) + >> quant_shift_ptr[1]; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value if (y) { eob = i; // last nonzero coeffs @@ -272,35 +236,21 @@ void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { } } } + d->eob = eob + 1; + } else { + d->eob = 0; } - d->eob = eob + 1; } void vp9_quantize_mby_8x8(MACROBLOCK *x) { int i; - int has_2nd_order = get_2nd_order_usage(&x->e_mbd); for (i = 0; i < 16; i ++) { x->e_mbd.block[i].eob = 0; } - x->e_mbd.block[24].eob = 0; for (i = 0; i < 16; i += 4) { - int ib = (i & 8) + ((i & 4) >> 1); - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, &x->e_mbd.block[ib]); - if (tx_type != DCT_DCT) - assert(has_2nd_order == 0); x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); } - - if (has_2nd_order) { - x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); - } else { - vpx_memset(x->e_mbd.block[24].qcoeff, 0, - 16 * sizeof(x->e_mbd.block[24].qcoeff[0])); - vpx_memset(x->e_mbd.block[24].dqcoeff, 0, - 16 * sizeof(x->e_mbd.block[24].dqcoeff[0])); - x->e_mbd.block[24].eob = 0; - } } void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { @@ -322,7 +272,6 @@ void vp9_quantize_mby_16x16(MACROBLOCK *x) { for (i = 0; i < 16; i++) x->e_mbd.block[i].eob = 0; - x->e_mbd.block[24].eob = 0; x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]); } @@ -460,21 +409,13 @@ void vp9_init_quantizer(VP9_COMP *cpi) { static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40 }; - - int qrounding_factor = 48; - for (Q = 0; Q < QINDEX_RANGE; Q++) { int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80; - -#if CONFIG_LOSSLESS - if (cpi->oxcf.lossless) { - if (Q == 0) { - qzbin_factor = 64; - qrounding_factor = 64; - } + int qrounding_factor = 48; + if (Q == 0) { + qzbin_factor = 64; + qrounding_factor = 64; } -#endif - // dc values quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q); invert_quant(cpi->Y1quant[Q] + 0, @@ -484,13 +425,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->common.Y1dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; - quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q); - invert_quant(cpi->Y2quant[Q] + 0, - cpi->Y2quant_shift[Q] + 0, quant_val); - cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y2dequant[Q][0] = quant_val; - cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q); invert_quant(cpi->UVquant[Q] + 0, cpi->UVquant_shift[Q] + 0, quant_val); @@ -512,15 +446,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_y1[Q][i] = ((quant_val * zbin_boost[i]) + 64) >> 7; - quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q); - invert_quant(cpi->Y2quant[Q] + rc, - cpi->Y2quant_shift[Q] + rc, quant_val); - cpi->Y2zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y2round[Q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y2dequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_y2[Q][i] = - ((quant_val * zbin_boost[i]) + 64) >> 7; - quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q); invert_quant(cpi->UVquant[Q] + rc, cpi->UVquant_shift[Q] + rc, quant_val); @@ -597,25 +522,6 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); } - // Y2 - zbin_extra = (cpi->common.Y2dequant[QIndex][1] * - ((cpi->zbin_over_quant / 2) + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - x->block[24].quant = cpi->Y2quant[QIndex]; - x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; - x->block[24].zbin = cpi->Y2zbin[QIndex]; - x->block[24].round = cpi->Y2round[QIndex]; - x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; - x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; - x->block[24].zbin_extra = (int16_t)zbin_extra; - - // TBD perhaps not use for Y2 - // Segment skip feature. - x->block[24].skip_block = - vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - /* save this macroblock QIndex for vp9_update_zbin_extra() */ x->e_mbd.q_index = QIndex; } @@ -643,14 +549,6 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { for (i = 16; i < 24; i++) { x->block[i].zbin_extra = (int16_t)zbin_extra; } - - // Y2 - zbin_extra = (cpi->common.Y2dequant[QIndex][1] * - ((cpi->zbin_over_quant / 2) + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - x->block[24].zbin_extra = (int16_t)zbin_extra; } void vp9_frame_init_quantizer(VP9_COMP *cpi) { @@ -669,10 +567,8 @@ void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) { // if any of the delta_q values are changing update flag will // have to be set. cm->y1dc_delta_q = 0; - cm->y2ac_delta_q = 0; cm->uvdc_delta_q = 0; cm->uvac_delta_q = 0; - cm->y2dc_delta_q = 0; // quantizer has to be reinitialized if any delta_q changes. // As there are not any here for now this is inactive code. diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index ac44a751c..9a8e35d2c 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -50,11 +50,6 @@ extern prototype_quantize_block(vp9_quantize_quantb_8x8); #endif extern prototype_quantize_block(vp9_quantize_quantb_16x16); -#ifndef vp9_quantize_quantb_2x2 -#define vp9_quantize_quantb_2x2 vp9_regular_quantize_b_2x2 -#endif -extern prototype_quantize_block(vp9_quantize_quantb_2x2); - #ifndef vp9_quantize_mb_4x4 #define vp9_quantize_mb_4x4 vp9_quantize_mb_4x4_c #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index bfa4a81ca..14b9a13db 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -155,7 +155,7 @@ static void fill_token_costs(vp9_coeff_count *c, for (i = 0; i < block_type_counts; i++) for (j = 0; j < COEF_BANDS; j++) for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0))) + if (k == 0 && j > 0) vp9_cost_tokens_skip((int *)(c[i][j][k]), p[i][j][k], vp9_coef_tree); @@ -280,17 +280,20 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { fill_token_costs(cpi->mb.token_costs[TX_4X4], cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4); fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4], - cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4); + cpi->common.fc.hybrid_coef_probs_4x4, + BLOCK_TYPES_4X4_HYBRID); fill_token_costs(cpi->mb.token_costs[TX_8X8], cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8); fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8], - cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8); + cpi->common.fc.hybrid_coef_probs_8x8, + BLOCK_TYPES_8X8_HYBRID); fill_token_costs(cpi->mb.token_costs[TX_16X16], cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16); fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16], - cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); + cpi->common.fc.hybrid_coef_probs_16x16, + BLOCK_TYPES_16X16_HYBRID); fill_token_costs(cpi->mb.token_costs[TX_32X32], cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32); @@ -320,26 +323,7 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { return error; } -int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) { - BLOCK *be; - BLOCKD *bd; - int i, j; - int berror, error = 0; - - for (i = 0; i < 16; i+=4) { - be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - berror = 0; - for (j = dc; j < 64; j++) { - int this_diff = be->coeff[j] - bd->dqcoeff[j]; - berror += this_diff * this_diff; - } - error += berror; - } - return error; -} - -int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) { +int vp9_mbblock_error_c(MACROBLOCK *mb) { BLOCK *be; BLOCKD *bd; int i, j; @@ -349,7 +333,7 @@ int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) { be = &mb->block[i]; bd = &mb->e_mbd.block[i]; berror = 0; - for (j = dc; j < 16; j++) { + for (j = 0; j < 16; j++) { int this_diff = be->coeff[j] - bd->dqcoeff[j]; berror += this_diff * this_diff; } @@ -419,11 +403,6 @@ int vp9_uvsse(MACROBLOCK *x) { } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#else -#define PT pt -#endif static INLINE int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, ENTROPY_CONTEXT *a, @@ -433,27 +412,21 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, const int eob = b->eob; MACROBLOCKD *xd = &mb->e_mbd; const int ib = (int)(b - xd->block); - int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; + int c = 0; int cost = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; - const int *scan, *band; + const int *scan; int16_t *qcoeff_ptr = b->qcoeff; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = (tx_type == DCT_DCT) ? mb->token_costs[tx_size][type] : mb->hybrid_token_costs[tx_size][type]; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; switch (tx_size) { case TX_4X4: scan = vp9_default_zig_zag1d_4x4; - band = vp9_coef_bands_4x4; seg_eob = 16; if (type == PLANE_TYPE_Y_WITH_DC) { if (tx_type == ADST_DCT) { @@ -464,19 +437,11 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, } break; case TX_8X8: - if (type == PLANE_TYPE_Y2) { - scan = vp9_default_zig_zag1d_4x4; - band = vp9_coef_bands_4x4; - seg_eob = 4; - } else { - scan = vp9_default_zig_zag1d_8x8; - band = vp9_coef_bands_8x8; - seg_eob = 64; - } + scan = vp9_default_zig_zag1d_8x8; + seg_eob = 64; break; case TX_16X16: scan = vp9_default_zig_zag1d_16x16; - band = vp9_coef_bands_16x16; seg_eob = 256; if (type == PLANE_TYPE_UV) { const int uv_idx = ib - 16; @@ -485,7 +450,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, break; case TX_32X32: scan = vp9_default_zig_zag1d_32x32; - band = vp9_coef_bands_32x32; seg_eob = 1024; qcoeff_ptr = xd->sb_coeff_data.qcoeff; break; @@ -495,59 +459,43 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); -#if CONFIG_NEWCOEFCONTEXT - neighbors = vp9_get_coef_neighbors_handle(scan); - pn = pt; -#endif if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; if (tx_type != DCT_DCT) { + int recent_energy = 0; for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; - cost += token_costs[band[c]][PT][t]; + cost += token_costs[get_coef_band(c)][pt][t]; cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; -#if CONFIG_NEWCOEFCONTEXT - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); - else - pn = pt; -#endif + pt = vp9_get_coef_context(&recent_energy, t); } if (c < seg_eob) - cost += mb->hybrid_token_costs[tx_size][type][band[c]] - [PT][DCT_EOB_TOKEN]; + cost += mb->hybrid_token_costs[tx_size][type][get_coef_band(c)] + [pt][DCT_EOB_TOKEN]; } else { + int recent_energy = 0; for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; - cost += token_costs[band[c]][pt][t]; + cost += token_costs[get_coef_band(c)][pt][t]; cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; -#if CONFIG_NEWCOEFCONTEXT - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); - else - pn = pt; -#endif + pt = vp9_get_coef_context(&recent_energy, t); } if (c < seg_eob) - cost += mb->token_costs[tx_size][type][band[c]] - [PT][DCT_EOB_TOKEN]; + cost += mb->token_costs[tx_size][type][get_coef_band(c)] + [pt][DCT_EOB_TOKEN]; } // is eob first coefficient; - pt = (c > !type); + pt = (c > 0); *a = *l = pt; return cost; } -static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) { +static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -567,19 +515,11 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) { } for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, xd->block + b, - (has_2nd_order ? - PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC), + cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); - if (has_2nd_order) - cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[TX_4X4][24], - tl + vp9_block2left[TX_4X4][24], - TX_4X4); - return cost; } @@ -588,26 +528,17 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb, int *Distortion, int *skippable, int backup) { MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *const mb_y2 = mb->block + 24; - BLOCKD *const x_y2 = xd->block + 24; - int d, has_2nd_order; xd->mode_info_context->mbmi.txfm_size = TX_4X4; - has_2nd_order = get_2nd_order_usage(xd); - // Fdct and building the 2nd order block vp9_transform_mby_4x4(mb); vp9_quantize_mby_4x4(mb); - d = vp9_mbblock_error(mb, has_2nd_order); - if (has_2nd_order) - d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); - - *Distortion = (d >> 2); - // rate - *Rate = rdcost_mby_4x4(mb, has_2nd_order, backup); - *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order); + + *Distortion = vp9_mbblock_error(mb) >> 2; + *Rate = rdcost_mby_4x4(mb, backup); + *skippable = vp9_mby_is_skippable_4x4(xd); } -static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) { +static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -627,18 +558,11 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) { } for (b = 0; b < 16; b += 4) - cost += cost_coeffs(mb, xd->block + b, - (has_2nd_order ? - PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC), + cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); - if (has_2nd_order) - cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[TX_8X8][24], - tl + vp9_block2left[TX_8X8][24], - TX_8X8); return cost; } @@ -647,23 +571,14 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, int *Distortion, int *skippable, int backup) { MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *const mb_y2 = mb->block + 24; - BLOCKD *const x_y2 = xd->block + 24; - int d, has_2nd_order; xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_mby_8x8(mb); vp9_quantize_mby_8x8(mb); - has_2nd_order = get_2nd_order_usage(xd); - d = vp9_mbblock_error_8x8_c(mb, has_2nd_order); - if (has_2nd_order) - d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); - - *Distortion = (d >> 2); - // rate - *Rate = rdcost_mby_8x8(mb, has_2nd_order, backup); - *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order); + + *Distortion = vp9_mbblock_error(mb) >> 2; + *Rate = rdcost_mby_8x8(mb, backup); + *skippable = vp9_mby_is_skippable_8x8(xd); } static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { @@ -689,7 +604,6 @@ static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, int *skippable, int backup) { - int d; MACROBLOCKD *xd = &mb->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_16X16; @@ -698,15 +612,13 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, // TODO(jingning) is it possible to quickly determine whether to force // trailing coefficients to be zero, instead of running trellis // optimization in the rate-distortion optimization loop? - if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) + if (mb->optimize && + xd->mode_info_context->mbmi.mode < I8X8_PRED) vp9_optimize_mby_16x16(mb); - d = vp9_mbblock_error(mb, 0); - - *Distortion = (d >> 2); - // rate + *Distortion = vp9_mbblock_error(mb) >> 2; *Rate = rdcost_mby_16x16(mb, backup); - *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd); + *skippable = vp9_mby_is_skippable_16x16(xd); } static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, @@ -859,21 +771,18 @@ static void super_block_yrd_32x32(MACROBLOCK *x, SUPERBLOCK * const x_sb = &x->sb_coeff_data; MACROBLOCKD * const xd = &x->e_mbd; SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; -#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID +#if DEBUG_ERROR int16_t out[1024]; #endif vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); -#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID +#if DEBUG_ERROR vp9_short_idct32x32(xd_sb->dqcoeff, out, 64); #endif -#if !CONFIG_DWTDCTHYBRID *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024); -#else - *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4; -#endif + #if DEBUG_ERROR printf("IDCT/FDCT error 32x32: %d (d: %d)\n", vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); @@ -1140,7 +1049,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); } @@ -1176,7 +1085,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob); #endif else - xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32); + xd->inv_txm4x4(best_dqcoeff, b->diff, 32); vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -1434,13 +1343,12 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_subtract_4b_c(be, b, 16); - assert(get_2nd_order_usage(xd) == 0); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { TX_TYPE tx_type = get_tx_type_8x8(xd, b); if (tx_type != DCT_DCT) vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); else - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); // compute quantization mse of 8x8 block @@ -1474,11 +1382,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, b, b + 1); do_two = 1; } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); @@ -2248,7 +2156,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, if (xd->mode_info_context->mbmi.second_ref_frame > 0) vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix); vp9_subtract_b(be, bd, 16); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, bd); thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); *distortion += thisdistortion; @@ -2300,7 +2208,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { if (otherrd) { - x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); + x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(be2, bd2); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; @@ -2312,7 +2220,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; be = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); *distortion += thisdistortion; @@ -2330,7 +2238,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, for (j = 0; j < 4; j += 2) { BLOCKD *bd = &xd->block[ib + iblock[j]]; BLOCK *be = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; @@ -2344,7 +2252,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, TX_4X4); } } - x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); + x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(be2, bd2); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; @@ -2747,8 +2655,9 @@ static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x, if (base_rd < txfm_cache[ONLY_4X4]) { txfm_cache[ONLY_4X4] = base_rd; } - if (base_rd + diff < txfm_cache[1]) { - txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff; + if (base_rd + diff < txfm_cache[ALLOW_8X8]) { + txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = + txfm_cache[ALLOW_32X32] = base_rd + diff; } if (diff < 0) { base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0); @@ -2895,8 +2804,8 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, *returndistortion = bsi.d; *returnyrate = bsi.segment_yrate; *skippable = bsi.txfm_size == TX_4X4 ? - vp9_mby_is_skippable_4x4(&x->e_mbd, 0) : - vp9_mby_is_skippable_8x8(&x->e_mbd, 0); + vp9_mby_is_skippable_4x4(&x->e_mbd) : + vp9_mby_is_skippable_8x8(&x->e_mbd); /* save partitions */ mbmi->txfm_size = bsi.txfm_size; @@ -3777,7 +3686,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if ((int)sse < threshold) { - unsigned int q2dc = xd->block[24].dequant[0]; + unsigned int q2dc = xd->block[0].dequant[0]; /* If there is no codeable 2nd order dc or a very small uniform pixel change change */ if ((sse - var < q2dc * q2dc >> 4) || @@ -4697,7 +4606,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = INT_MIN; + best_txfm_diff[i] = 0; else best_txfm_diff[i] = best_rd - best_txfm_rd[i]; } @@ -4723,22 +4632,28 @@ void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, int rate_y_tokenonly = 0, rate_uv_tokenonly; int dist_y = 0, dist_uv; int y_skip = 0, uv_skip; - int64_t txfm_cache[NB_TXFM_MODES]; + int64_t txfm_cache[NB_TXFM_MODES], err; + int i; - rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, txfm_cache); + err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, &y_skip, txfm_cache); rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip); + &dist_uv, &uv_skip); if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); *returndist = dist_y + (dist_uv >> 2); + memset(x->sb32_context[xd->sb_index].txfm_rd_diff, 0, + sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff)); } else { *returnrate = rate_y + rate_uv; if (cpi->common.mb_no_coeff_skip) *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); + for (i = 0; i < NB_TXFM_MODES; i++) { + x->sb32_context[xd->sb_index].txfm_rd_diff[i] = err - txfm_cache[i]; + } } } @@ -4751,22 +4666,28 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, int rate_y_tokenonly = 0, rate_uv_tokenonly; int dist_y = 0, dist_uv; int y_skip = 0, uv_skip; - int64_t txfm_cache[NB_TXFM_MODES]; + int64_t txfm_cache[NB_TXFM_MODES], err; + int i; - rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, txfm_cache); + err = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, &y_skip, txfm_cache); rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip); + &dist_uv, &uv_skip); if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); *returndist = dist_y + (dist_uv >> 2); + memset(x->sb64_context.txfm_rd_diff, 0, + sizeof(x->sb64_context.txfm_rd_diff)); } else { *returnrate = rate_y + rate_uv; if (cm->mb_no_coeff_skip) *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); + for (i = 0; i < NB_TXFM_MODES; i++) { + x->sb64_context.txfm_rd_diff[i] = err - txfm_cache[i]; + } } } @@ -4824,6 +4745,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, mode8x8[2]= xd->mode_info_context->bmi[8].as_mode.first; mode8x8[3]= xd->mode_info_context->bmi[10].as_mode.first; + mbmi->txfm_size = TX_4X4; error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, error16x16); @@ -5402,7 +5324,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = INT_MIN; + best_txfm_diff[i] = 0; else best_txfm_diff[i] = best_rd - best_txfm_rd[i]; } diff --git a/vp9/encoder/vp9_satd_c.c b/vp9/encoder/vp9_satd_c.c deleted file mode 100644 index 212c2243d..000000000 --- a/vp9/encoder/vp9_satd_c.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> -#include "vpx_ports/mem.h" -#include "./vp9_rtcd.h" - -unsigned int vp9_satd16x16_c(const uint8_t *src_ptr, - int src_stride, - const uint8_t *ref_ptr, - int ref_stride, - unsigned int *psatd) { - int r, c, i; - unsigned int satd = 0; - DECLARE_ALIGNED(16, int16_t, diff_in[256]); - DECLARE_ALIGNED(16, int16_t, diff_out[16]); - int16_t *in; - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) { - diff_in[r * 16 + c] = src_ptr[c] - ref_ptr[c]; - } - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - in = diff_in; - for (r = 0; r < 16; r += 4) { - for (c = 0; c < 16; c += 4) { - vp9_short_walsh4x4_c(in + c, diff_out, 32); - for (i = 0; i < 16; i++) - satd += abs(diff_out[i]); - } - in += 64; - } - - if (psatd) - *psatd = satd; - - return satd; -} diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 710ca7ea0..b125a486e 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -255,7 +255,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { int t_pred_cost = INT_MAX; int i; - int tile, mb_row, mb_col; + int tile_col, mb_row, mb_col; int temporal_predictor_count[PREDICTION_PROBS][2]; int no_pred_segcounts[MAX_MB_SEGMENTS]; @@ -283,10 +283,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - for (tile = 0; tile < cm->tile_columns; tile++) { - cm->cur_tile_idx = tile; - vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start, - &cm->cur_tile_mb_col_end); + for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { + vp9_get_tile_col_offsets(cm, tile_col); mi_ptr = cm->mi + cm->cur_tile_mb_col_start; for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 2dedb1a51..17eae463f 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -26,19 +26,19 @@ #ifdef ENTROPY_STATS vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4]; -vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4]; +vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4_HYBRID]; vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8]; -vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8]; +vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8_HYBRID]; vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16]; -vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16]; +vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16_HYBRID]; vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4]; -extern vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4]; +extern vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4_HYBRID]; extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8]; -extern vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8]; +extern vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8_HYBRID]; extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16]; -extern vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16]; +extern vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16_HYBRID]; extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; #endif /* ENTROPY_STATS */ @@ -100,12 +100,6 @@ static void fill_value_tokens() { vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#else -#define PT pt -#endif - static void tokenize_b(VP9_COMP *cpi, MACROBLOCKD *xd, const int ib, @@ -114,22 +108,19 @@ static void tokenize_b(VP9_COMP *cpi, TX_SIZE tx_size, int dry_run) { int pt; /* near block/prev token context index */ - int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; + int c = 0; + int recent_energy = 0; const BLOCKD * const b = xd->block + ib; const int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ int16_t *qcoeff_ptr = b->qcoeff; int seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; - const int *bands, *scan; + const int *scan; vp9_coeff_count *counts; vp9_coeff_probs *probs; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; @@ -147,7 +138,6 @@ static void tokenize_b(VP9_COMP *cpi, default: case TX_4X4: seg_eob = 16; - bands = vp9_coef_bands_4x4; scan = vp9_default_zig_zag1d_4x4; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_4x4; @@ -163,19 +153,12 @@ static void tokenize_b(VP9_COMP *cpi, } break; case TX_8X8: - if (type == PLANE_TYPE_Y2) { - seg_eob = 4; - bands = vp9_coef_bands_4x4; - scan = vp9_default_zig_zag1d_4x4; - } else { #if CONFIG_CNVCONTEXT - a_ec = (a[0] + a[1]) != 0; - l_ec = (l[0] + l[1]) != 0; + a_ec = (a[0] + a[1]) != 0; + l_ec = (l[0] + l[1]) != 0; #endif - seg_eob = 64; - bands = vp9_coef_bands_8x8; - scan = vp9_default_zig_zag1d_8x8; - } + seg_eob = 64; + scan = vp9_default_zig_zag1d_8x8; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_8x8; probs = cpi->common.fc.hybrid_coef_probs_8x8; @@ -195,7 +178,6 @@ static void tokenize_b(VP9_COMP *cpi, } #endif seg_eob = 256; - bands = vp9_coef_bands_16x16; scan = vp9_default_zig_zag1d_16x16; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_16x16; @@ -219,7 +201,6 @@ static void tokenize_b(VP9_COMP *cpi, l_ec = l_ec != 0; #endif seg_eob = 1024; - bands = vp9_coef_bands_32x32; scan = vp9_default_zig_zag1d_32x32; counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; @@ -228,16 +209,12 @@ static void tokenize_b(VP9_COMP *cpi, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); -#if CONFIG_NEWCOEFCONTEXT - neighbors = vp9_get_coef_neighbors_handle(scan); - pn = pt; -#endif if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; do { - const int band = bands[c]; + const int band = get_coef_band(c); int token; if (c < eob) { @@ -252,30 +229,23 @@ static void tokenize_b(VP9_COMP *cpi, } t->Token = token; - t->context_tree = probs[type][band][PT]; - t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || - (band > 1 && type == PLANE_TYPE_Y_NO_DC)); + t->context_tree = probs[type][band][pt]; + t->skip_eob_node = (pt == 0) && (band > 0); assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { - ++counts[type][band][PT][token]; + ++counts[type][band][pt][token]; } - pt = vp9_prev_token_class[token]; -#if CONFIG_NEWCOEFCONTEXT - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(bands[c + 1])) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); - else - pn = pt; -#endif + + pt = vp9_get_coef_context(&recent_energy, token); ++t; } while (c < eob && ++c < seg_eob); *tp = t; - a_ec = l_ec = (c > !type); /* 0 <-> all coeff data is zero */ + a_ec = l_ec = (c > 0); /* 0 <-> all coeff data is zero */ a[0] = a_ec; l[0] = l_ec; - if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) { + if (tx_size == TX_8X8) { a[1] = a_ec; l[1] = l_ec; } else if (tx_size == TX_16X16) { @@ -294,18 +264,13 @@ static void tokenize_b(VP9_COMP *cpi, } } -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) { +int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) { int skip = 1; int i = 0; - if (has_2nd_order) { - for (i = 0; i < 16; i++) - skip &= (xd->block[i].eob < 2); - skip &= (!xd->block[24].eob); - } else { - for (i = 0; i < 16; i++) - skip &= (!xd->block[i].eob); - } + for (i = 0; i < 16; i++) + skip &= (!xd->block[i].eob); + return skip; } @@ -318,23 +283,18 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) { return skip; } -static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) { - return (vp9_mby_is_skippable_4x4(xd, has_2nd_order) & +static int mb_is_skippable_4x4(MACROBLOCKD *xd) { + return (vp9_mby_is_skippable_4x4(xd) & vp9_mbuv_is_skippable_4x4(xd)); } -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) { +int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) { int skip = 1; int i = 0; - if (has_2nd_order) { - for (i = 0; i < 16; i += 4) - skip &= (xd->block[i].eob < 2); - skip &= (!xd->block[24].eob); - } else { - for (i = 0; i < 16; i += 4) - skip &= (!xd->block[i].eob); - } + for (i = 0; i < 16; i += 4) + skip &= (!xd->block[i].eob); + return skip; } @@ -342,13 +302,13 @@ int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) { return (!xd->block[16].eob) & (!xd->block[20].eob); } -static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) { - return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) & +static int mb_is_skippable_8x8(MACROBLOCKD *xd) { + return (vp9_mby_is_skippable_8x8(xd) & vp9_mbuv_is_skippable_8x8(xd)); } -static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_2nd_order) { - return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) & +static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) { + return (vp9_mby_is_skippable_8x8(xd) & vp9_mbuv_is_skippable_4x4(xd)); } @@ -384,10 +344,6 @@ void vp9_tokenize_sb(VP9_COMP *cpi, VP9_COMMON * const cm = &cpi->common; MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; TOKENEXTRA *t_backup = *t; - ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0), - (ENTROPY_CONTEXT *) (xd->above_context + 1), }; - ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0), - (ENTROPY_CONTEXT *) (xd->left_context + 1), }; const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); const int segment_id = mbmi->segment_id; const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); @@ -418,7 +374,6 @@ void vp9_tokenize_sb(VP9_COMP *cpi, tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); } - A[0][8] = L[0][8] = A[1][8] = L[1][8] = 0; if (dry_run) *t = t_backup; } @@ -427,8 +382,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - PLANE_TYPE plane_type; - int has_2nd_order; int b; int tx_size = xd->mode_info_context->mbmi.txfm_size; int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP); @@ -445,8 +398,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } else skip_inc = 0; - has_2nd_order = get_2nd_order_usage(xd); - switch (tx_size) { case TX_16X16: @@ -456,15 +407,15 @@ void vp9_tokenize_mb(VP9_COMP *cpi, if (xd->mode_info_context->mbmi.mode == I8X8_PRED || xd->mode_info_context->mbmi.mode == SPLITMV) xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_8x8_4x4uv(xd, 0); + mb_is_skippable_8x8_4x4uv(xd); else xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_8x8(xd, has_2nd_order); + mb_is_skippable_8x8(xd); break; default: xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_4x4(xd, has_2nd_order); + mb_is_skippable_4x4(xd); break; } @@ -485,15 +436,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi, if (!dry_run) cpi->skip_false_count[mb_skip_context] += skip_inc; - if (has_2nd_order) { - tokenize_b(cpi, xd, 24, t, PLANE_TYPE_Y2, tx_size, dry_run); - plane_type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - plane_type = PLANE_TYPE_Y_WITH_DC; - } - if (tx_size == TX_16X16) { tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); for (b = 16; b < 24; b += 4) { @@ -501,7 +443,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } } else if (tx_size == TX_8X8) { for (b = 0; b < 16; b += 4) { - tokenize_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); } if (xd->mode_info_context->mbmi.mode == I8X8_PRED || xd->mode_info_context->mbmi.mode == SPLITMV) { @@ -514,11 +456,10 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } } } else { - for (b = 0; b < 24; b++) { - if (b >= 16) - plane_type = PLANE_TYPE_UV; - tokenize_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run); - } + for (b = 0; b < 16; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + for (b = 16; b < 24; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); } if (dry_run) *t = t_backup; @@ -664,32 +605,33 @@ void print_context_counters() { /* print counts */ print_counter(f, context_counters_4x4, BLOCK_TYPES_4X4, "vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]"); - print_counter(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4, - "vp9_default_hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]"); + print_counter(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4_HYBRID, + "vp9_default_hybrid_coef_counts_4x4[BLOCK_TYPES_4X4_HYBRID]"); print_counter(f, context_counters_8x8, BLOCK_TYPES_8X8, "vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]"); - print_counter(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8, - "vp9_default_hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]"); + print_counter(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8_HYBRID, + "vp9_default_hybrid_coef_counts_8x8[BLOCK_TYPES_8X8_HYBRID]"); print_counter(f, context_counters_16x16, BLOCK_TYPES_16X16, "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]"); - print_counter(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, - "vp9_default_hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]"); + print_counter(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16_HYBRID, + "vp9_default_hybrid_coef_counts_16x16" + "[BLOCK_TYPES_16X16_HYBRID]"); print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32, "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]"); /* print coefficient probabilities */ print_probs(f, context_counters_4x4, BLOCK_TYPES_4X4, "default_coef_probs_4x4[BLOCK_TYPES_4X4]"); - print_probs(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4, - "default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]"); + print_probs(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4_HYBRID, + "default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4_HYBRID]"); print_probs(f, context_counters_8x8, BLOCK_TYPES_8X8, "default_coef_probs_8x8[BLOCK_TYPES_8X8]"); - print_probs(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8, - "default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]"); + print_probs(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8_HYBRID, + "default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8_HYBRID]"); print_probs(f, context_counters_16x16, BLOCK_TYPES_16X16, "default_coef_probs_16x16[BLOCK_TYPES_16X16]"); - print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, - "default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]"); + print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16_HYBRID, + "default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16_HYBRID]"); print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32, "default_coef_probs_32x32[BLOCK_TYPES_32X32]"); @@ -722,7 +664,6 @@ static INLINE void stuff_b(VP9_COMP *cpi, TX_SIZE tx_size, int dry_run) { const BLOCKD * const b = xd->block + ib; - const int *bands; vp9_coeff_count *counts; vp9_coeff_probs *probs; int pt, band; @@ -742,7 +683,6 @@ static INLINE void stuff_b(VP9_COMP *cpi, switch (tx_size) { default: case TX_4X4: - bands = vp9_coef_bands_4x4; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_4x4; probs = cpi->common.fc.hybrid_coef_probs_4x4; @@ -753,12 +693,9 @@ static INLINE void stuff_b(VP9_COMP *cpi, break; case TX_8X8: #if CONFIG_CNVCONTEXT - if (type != PLANE_TYPE_Y2) { - a_ec = (a[0] + a[1]) != 0; - l_ec = (l[0] + l[1]) != 0; - } + a_ec = (a[0] + a[1]) != 0; + l_ec = (l[0] + l[1]) != 0; #endif - bands = vp9_coef_bands_8x8; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_8x8; probs = cpi->common.fc.hybrid_coef_probs_8x8; @@ -777,7 +714,6 @@ static INLINE void stuff_b(VP9_COMP *cpi, l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; } #endif - bands = vp9_coef_bands_16x16; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_16x16; probs = cpi->common.fc.hybrid_coef_probs_16x16; @@ -795,7 +731,6 @@ static INLINE void stuff_b(VP9_COMP *cpi, a_ec = a_ec != 0; l_ec = l_ec != 0; #endif - bands = vp9_coef_bands_32x32; counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; break; @@ -803,14 +738,14 @@ static INLINE void stuff_b(VP9_COMP *cpi, VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); - band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; + band = get_coef_band(0); t->Token = DCT_EOB_TOKEN; t->context_tree = probs[type][band][pt]; t->skip_eob_node = 0; ++t; *tp = t; *a = *l = 0; - if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) { + if (tx_size == TX_8X8) { a[1] = 0; l[1] = 0; } else if (tx_size == TX_16X16) { @@ -835,26 +770,12 @@ static INLINE void stuff_b(VP9_COMP *cpi, static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - PLANE_TYPE plane_type; int b; - int has_2nd_order = get_2nd_order_usage(xd); - - if (has_2nd_order) { - stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run); - plane_type = PLANE_TYPE_Y_NO_DC; - } else { -#if CONFIG_CNVCONTEXT - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; -#endif - plane_type = PLANE_TYPE_Y_WITH_DC; - } - for (b = 0; b < 24; b += 4) { - if (b >= 16) - plane_type = PLANE_TYPE_UV; - stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run); - } + for (b = 0; b < 16; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + for (b = 16; b < 24; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); } static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, @@ -865,56 +786,26 @@ static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, for (b = 16; b < 24; b += 4) { stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); } -#if CONFIG_CNVCONTEXT - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; -#endif } static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { int b; - PLANE_TYPE plane_type; - int has_2nd_order = get_2nd_order_usage(xd); - if (has_2nd_order) { - stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_4X4, dry_run); - plane_type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - plane_type = PLANE_TYPE_Y_WITH_DC; - } - - for (b = 0; b < 24; b++) { - if (b >= 16) - plane_type = PLANE_TYPE_UV; - stuff_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run); - } + for (b = 0; b < 16; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + for (b = 16; b < 24; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); } static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - PLANE_TYPE plane_type; int b; - int has_2nd_order = get_2nd_order_usage(xd); - if (has_2nd_order) { - stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run); - plane_type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - plane_type = PLANE_TYPE_Y_WITH_DC; - } - - for (b = 0; b < 16; b += 4) { - stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run); - } - - for (b = 16; b < 24; b++) { + for (b = 0; b < 16; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + for (b = 16; b < 24; b++) stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); - } } void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 3eeb8fa5a..01aa7fb79 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -31,9 +31,9 @@ typedef struct { typedef int64_t vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block); +extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd); extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); -extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); +extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd); extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); @@ -61,9 +61,9 @@ extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8]; extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16]; extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; -extern vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4]; -extern vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8]; -extern vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16]; +extern vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4_HYBRID]; +extern vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8_HYBRID]; +extern vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16_HYBRID]; #endif extern const int *vp9_dct_value_cost_ptr; diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm index 5d9f7769d..90c793d4f 100644 --- a/vp9/encoder/x86/vp9_encodeopt.asm +++ b/vp9/encoder/x86/vp9_encodeopt.asm @@ -125,7 +125,7 @@ sym(vp9_block_error_mmx): ret -;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); +;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr); global sym(vp9_mbblock_error_mmx_impl) PRIVATE sym(vp9_mbblock_error_mmx_impl): push rbp @@ -142,10 +142,6 @@ sym(vp9_mbblock_error_mmx_impl): mov rdi, arg(1) ;dcoef_ptr pxor mm2, mm2 - movd mm1, dword ptr arg(2) ;dc - por mm1, mm2 - - pcmpeqw mm1, mm7 mov rcx, 16 .mberror_loop_mmx: @@ -160,7 +156,6 @@ sym(vp9_mbblock_error_mmx_impl): pmaddwd mm5, mm5 psubw mm3, mm4 - pand mm3, mm1 pmaddwd mm3, mm3 paddd mm2, mm5 @@ -202,28 +197,24 @@ sym(vp9_mbblock_error_mmx_impl): ret -;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); +;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr); global sym(vp9_mbblock_error_xmm_impl) PRIVATE sym(vp9_mbblock_error_xmm_impl): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 6 + SAVE_XMM 5 push rsi push rdi ; end prolog mov rsi, arg(0) ;coeff_ptr - pxor xmm6, xmm6 + pxor xmm5, xmm5 mov rdi, arg(1) ;dcoef_ptr pxor xmm4, xmm4 - movd xmm5, dword ptr arg(2) ;dc - por xmm5, xmm4 - - pcmpeqw xmm5, xmm6 mov rcx, 16 .mberror_loop: @@ -238,7 +229,6 @@ sym(vp9_mbblock_error_xmm_impl): pmaddwd xmm2, xmm2 psubw xmm0, xmm1 - pand xmm0, xmm5 pmaddwd xmm0, xmm0 add rsi, 32 @@ -252,9 +242,9 @@ sym(vp9_mbblock_error_xmm_impl): jnz .mberror_loop movdqa xmm0, xmm4 - punpckldq xmm0, xmm6 + punpckldq xmm0, xmm5 - punpckhdq xmm4, xmm6 + punpckhdq xmm4, xmm5 paddd xmm0, xmm4 movdqa xmm1, xmm0 diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c index 3beef53a2..2bf32c569 100644 --- a/vp9/encoder/x86/vp9_x86_csystemdependent.c +++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c @@ -23,11 +23,11 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) { vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch); } -int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -int vp9_mbblock_error_mmx(MACROBLOCK *mb, int dc) { +int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr); +int vp9_mbblock_error_mmx(MACROBLOCK *mb) { short *coeff_ptr = mb->block[0].coeff; short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); + return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr); } int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); @@ -51,11 +51,11 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { #endif #if HAVE_SSE2 -int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -int vp9_mbblock_error_xmm(MACROBLOCK *mb, int dc) { +int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr); +int vp9_mbblock_error_xmm(MACROBLOCK *mb) { short *coeff_ptr = mb->block[0].coeff; short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc); + return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr); } int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 80320c44a..2653954d0 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -27,6 +27,7 @@ struct vp8_extracfg { unsigned int Sharpness; unsigned int static_thresh; unsigned int tile_columns; + unsigned int tile_rows; unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */ unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */ unsigned int arnr_type; /* alt_ref filter type */ @@ -34,9 +35,7 @@ struct vp8_extracfg { vp8e_tuning tuning; unsigned int cq_level; /* constrained quality level */ unsigned int rc_max_intra_bitrate_pct; -#if CONFIG_LOSSLESS unsigned int lossless; -#endif }; struct extraconfig_map { @@ -55,6 +54,7 @@ static const struct extraconfig_map extracfg_map[] = { 0, /* Sharpness */ 0, /* static_thresh */ 0, /* tile_columns */ + 0, /* tile_rows */ 0, /* arnr_max_frames */ 3, /* arnr_strength */ 3, /* arnr_type*/ @@ -62,9 +62,7 @@ static const struct extraconfig_map extracfg_map[] = { 0, /* tuning*/ 10, /* cq_level */ 0, /* rc_max_intra_bitrate_pct */ -#if CONFIG_LOSSLESS 0, /* lossless */ -#endif } } }; @@ -136,13 +134,11 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); -#if CONFIG_LOSSLESS RANGE_CHECK_BOOL(vp8_cfg, lossless); if (vp8_cfg->lossless) { RANGE_CHECK_HI(cfg, rc_max_quantizer, 0); RANGE_CHECK_HI(cfg, rc_min_quantizer, 0); } -#endif RANGE_CHECK_HI(cfg, g_threads, 64); RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS); @@ -172,6 +168,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); RANGE_CHECK(vp8_cfg, tile_columns, 0, 6); + RANGE_CHECK(vp8_cfg, tile_rows, 0, 2); RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); @@ -309,10 +306,9 @@ static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf, oxcf->tuning = vp8_cfg.tuning; oxcf->tile_columns = vp8_cfg.tile_columns; + oxcf->tile_rows = vp8_cfg.tile_rows; -#if CONFIG_LOSSLESS oxcf->lossless = vp8_cfg.lossless; -#endif oxcf->error_resilient_mode = cfg.g_error_resilient; oxcf->frame_parallel_decoding_mode = cfg.g_frame_parallel_decoding; @@ -416,6 +412,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP8E_SET_SHARPNESS, xcfg.Sharpness); MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh); MAP(VP9E_SET_TILE_COLUMNS, xcfg.tile_columns); + MAP(VP9E_SET_TILE_ROWS, xcfg.tile_rows); MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames); MAP(VP8E_SET_ARNR_STRENGTH, xcfg.arnr_strength); @@ -423,9 +420,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP8E_SET_TUNING, xcfg.tuning); MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct); -#if CONFIG_LOSSLESS MAP(VP9E_SET_LOSSLESS, xcfg.lossless); -#endif } res = validate_config(ctx, &ctx->cfg, &xcfg); @@ -1006,6 +1001,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { {VP8E_SET_SHARPNESS, set_param}, {VP8E_SET_STATIC_THRESHOLD, set_param}, {VP9E_SET_TILE_COLUMNS, set_param}, + {VP9E_SET_TILE_ROWS, set_param}, {VP8E_GET_LAST_QUANTIZER, get_param}, {VP8E_GET_LAST_QUANTIZER_64, get_param}, {VP8E_SET_ARNR_MAXFRAMES, set_param}, @@ -1014,9 +1010,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { {VP8E_SET_TUNING, set_param}, {VP8E_SET_CQ_LEVEL, set_param}, {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, -#if CONFIG_LOSSLESS {VP9E_SET_LOSSLESS, set_param}, -#endif { -1, NULL}, }; diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index b41873540..6e57e67ef 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -65,7 +65,6 @@ VP9_CX_SRCS-yes += encoder/vp9_quantize.c VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c VP9_CX_SRCS-yes += encoder/vp9_rdopt.c VP9_CX_SRCS-yes += encoder/vp9_sad_c.c -VP9_CX_SRCS-yes += encoder/vp9_satd_c.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index d483c47cc..0b910b99d 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -188,7 +188,8 @@ enum vp8e_enc_control_id { /* TODO(jkoleszar): Move to vp9cx.h */ VP9E_SET_LOSSLESS, - VP9E_SET_TILE_COLUMNS + VP9E_SET_TILE_COLUMNS, + VP9E_SET_TILE_ROWS, }; /*!\brief vpx 1-D scaling mode @@ -300,6 +301,7 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */ VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_TILE_COLUMNS, int) +VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) @@ -301,6 +301,7 @@ struct detect_buffer { struct input_state { char *fn; FILE *file; + off_t length; y4m_input y4m; struct detect_buffer detect; enum video_file_type file_type; @@ -1084,6 +1085,8 @@ static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1, "Number of token partitions to use, log2"); static const arg_def_t tile_cols = ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2"); +static const arg_def_t tile_rows = ARG_DEF(NULL, "tile-rows", 1, + "Number of tile rows to use, log2"); static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames"); static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1, @@ -1103,9 +1106,7 @@ static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1, "Constrained Quality Level"); static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)"); -#if CONFIG_LOSSLESS static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode"); -#endif #if CONFIG_VP8_ENCODER static const arg_def_t *vp8_args[] = { @@ -1127,22 +1128,17 @@ static const int vp8_arg_ctrl_map[] = { #if CONFIG_VP9_ENCODER static const arg_def_t *vp9_args[] = { &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, - &tile_cols, &arnr_maxframes, &arnr_strength, &arnr_type, - &tune_ssim, &cq_level, &max_intra_rate_pct, -#if CONFIG_LOSSLESS - &lossless, -#endif + &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type, + &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless, NULL }; static const int vp9_arg_ctrl_map[] = { VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, - VP9E_SET_TILE_COLUMNS, + VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS, VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, -#if CONFIG_LOSSLESS VP9E_SET_LOSSLESS, -#endif 0 }; #endif @@ -1742,6 +1738,14 @@ void open_input_file(struct input_state *input) { if (!input->file) fatal("Failed to open input file"); + if (!fseeko(input->file, 0, SEEK_END)) { + /* Input file is seekable. Figure out how long it is, so we can get + * progress info. + */ + input->length = ftello(input->file); + rewind(input->file); + } + /* For RAW input sources, these bytes will applied on the first frame * in read_frame(). */ @@ -2265,9 +2269,6 @@ static void get_cx_data(struct stream_state *stream, if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) { stream->frames_out++; } - if (!global->quiet) - fprintf(stderr, " %6luF", - (unsigned long)pkt->data.frame.sz); update_rate_histogram(&stream->rate_hist, cfg, pkt); if (stream->config.write_webm) { @@ -2311,9 +2312,6 @@ static void get_cx_data(struct stream_state *stream, break; case VPX_CODEC_STATS_PKT: stream->frames_out++; - if (!global->quiet) - fprintf(stderr, " %6luS", - (unsigned long)pkt->data.twopass_stats.sz); stats_write(&stream->stats, pkt->data.twopass_stats.buf, pkt->data.twopass_stats.sz); @@ -2327,8 +2325,6 @@ static void get_cx_data(struct stream_state *stream, stream->psnr_sse_total += pkt->data.psnr.sse[0]; stream->psnr_samples_total += pkt->data.psnr.samples[0]; for (i = 0; i < 4; i++) { - if (!global->quiet) - fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]); stream->psnr_totals[i] += pkt->data.psnr.psnr[i]; } stream->psnr_count++; @@ -2361,7 +2357,7 @@ static void show_psnr(struct stream_state *stream) { } -float usec_to_fps(uint64_t usec, unsigned int frames) { +static float usec_to_fps(uint64_t usec, unsigned int frames) { return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0); } @@ -2386,6 +2382,24 @@ static void test_decode(struct stream_state *stream) { } } + +static void print_time(const char *label, int64_t etl) { + int hours, mins, secs; + + if (etl >= 0) { + hours = etl / 3600; + etl -= hours * 3600; + mins = etl / 60; + etl -= mins * 60; + secs = etl; + + fprintf(stderr, "[%3s %2d:%02d:%02d] ", + label, hours, mins, secs); + } else { + fprintf(stderr, "[%3s unknown] ", label); + } +} + int main(int argc, const char **argv_) { int pass; vpx_image_t raw; @@ -2443,6 +2457,9 @@ int main(int argc, const char **argv_) { for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) { int frames_in = 0; + int64_t estimated_time_left = -1; + int64_t average_rate = -1; + off_t lagged_count = 0; open_input_file(&input); @@ -2518,18 +2535,23 @@ int main(int argc, const char **argv_) { frames_in++; if (!global.quiet) { + float fps = usec_to_fps(cx_time, frames_in); + fprintf(stderr, "\rPass %d/%d ", pass + 1, global.passes); + if (stream_cnt == 1) fprintf(stderr, - "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", - pass + 1, global.passes, frames_in, - streams->frames_out, (int64_t)streams->nbytes); + "frame %4d/%-4d %7"PRId64"B ", + frames_in, streams->frames_out, (int64_t)streams->nbytes); else - fprintf(stderr, - "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", - pass + 1, global.passes, frames_in, - cx_time > 9999999 ? cx_time / 1000 : cx_time, - cx_time > 9999999 ? "ms" : "us", - usec_to_fps(cx_time, frames_in)); + fprintf(stderr, "frame %4d ", frames_in); + + fprintf(stderr, "%7lu %s %.2f %s ", + cx_time > 9999999 ? cx_time / 1000 : cx_time, + cx_time > 9999999 ? "ms" : "us", + fps >= 1.0 ? fps : 1000.0 / fps, + fps >= 1.0 ? "fps" : "ms/f"); + print_time("ETA", estimated_time_left); + fprintf(stderr, "\033[K"); } } else @@ -2548,6 +2570,32 @@ int main(int argc, const char **argv_) { got_data = 0; FOREACH_STREAM(get_cx_data(stream, &global, &got_data)); + if (!got_data && input.length && !streams->frames_out) { + lagged_count = global.limit ? frames_in : ftello(input.file); + } else if (got_data && input.length) { + int64_t remaining; + int64_t rate; + + if (global.limit) { + int frame_in_lagged = (frames_in - lagged_count) * 1000; + + rate = cx_time ? frame_in_lagged * (int64_t)1000000 / cx_time : 0; + remaining = 1000 * (global.limit - frames_in + lagged_count); + } else { + off_t input_pos = ftello(input.file); + off_t input_pos_lagged = input_pos - lagged_count; + int64_t limit = input.length; + + rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0; + remaining = limit - input_pos + lagged_count; + } + + average_rate = (average_rate <= 0) + ? rate + : (average_rate * 7 + rate) / 8; + estimated_time_left = average_rate ? remaining / average_rate : -1; + } + if (got_data && global.test_decode) FOREACH_STREAM(test_decode(stream)); } |