diff options
Diffstat (limited to 'vp9')
32 files changed, 420 insertions, 372 deletions
diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm new file mode 100644 index 000000000..869ee5f3f --- /dev/null +++ b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm @@ -0,0 +1,68 @@ +; +; Copyright (c) 2013 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |vp9_short_idct4x4_1_add_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, +; int dest_stride) +; +; r0 int16_t input +; r1 uint8_t *dest +; r2 int dest_stride) + +|vp9_short_idct4x4_1_add_neon| PROC + ldrsh r0, [r0] + + ; generate cospi_16_64 = 11585 + mov r12, #0x2d00 + add r12, #0x41 + + ; out = dct_const_round_shift(input[0] * cospi_16_64) + mul r0, r0, r12 ; input[0] * cospi_16_64 + add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) + asr r0, r0, #14 ; >> DCT_CONST_BITS + + ; out = dct_const_round_shift(out * cospi_16_64) + mul r0, r0, r12 ; out * cospi_16_64 + mov r12, r1 ; save dest + add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) + asr r0, r0, #14 ; >> DCT_CONST_BITS + + ; a1 = ROUND_POWER_OF_TWO(out, 4) + add r0, r0, #8 ; + (1 <<((4) - 1)) + asr r0, r0, #4 ; >> 4 + + vdup.s16 q0, r0 ; duplicate a1 + + vld1.32 {d2[0]}, [r1], r2 + vld1.32 {d2[1]}, [r1], r2 + vld1.32 {d4[0]}, [r1], r2 + vld1.32 {d4[1]}, [r1] + + vaddw.u8 q8, q0, d2 ; dest[x] + a1 + vaddw.u8 q9, q0, d4 + + vqmovun.s16 d6, q8 ; clip_pixel + vqmovun.s16 d7, q9 + + vst1.32 {d6[0]}, [r12], r2 + vst1.32 {d6[1]}, [r12], r2 + vst1.32 {d7[0]}, [r12], r2 + vst1.32 {d7[1]}, [r12] + + bx lr + ENDP ; |vp9_short_idct4x4_1_add_neon| + + END diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index dfd152646..9a42ad9f1 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -89,9 +89,9 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } -#define VP9_INTRA_MODES (TM_PRED + 1) +#define INTRA_MODES (TM_PRED + 1) -#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV) +#define INTER_MODES (1 + NEWMV - NEARESTMV) static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) { return (mode - NEARESTMV); diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index 1d9684992..1e6cd4404 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -49,7 +49,7 @@ static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride, for (k = 0; k < taps; ++k) sum += src[src_x + k] * filter_x[k]; - dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)); + dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); /* Move to the next source pixel */ x_q4 += x_step_q4; @@ -91,7 +91,7 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, sum += src[src_x + k] * filter_x[k]; dst[x] = ROUND_POWER_OF_TWO(dst[x] + - clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1); + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); /* Move to the next source pixel */ x_q4 += x_step_q4; @@ -133,7 +133,7 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, sum += src[(src_y + k) * src_stride] * filter_y[k]; dst[y * dst_stride] = - clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)); + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); /* Move to the next source pixel */ y_q4 += y_step_q4; @@ -175,7 +175,7 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, sum += src[(src_y + k) * src_stride] * filter_y[k]; dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + - clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1); + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); /* Move to the next source pixel */ y_q4 += y_step_q4; diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h index 9522b78bc..13220e97e 100644 --- a/vp9/common/vp9_convolve.h +++ b/vp9/common/vp9_convolve.h @@ -13,7 +13,7 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" -#define VP9_FILTER_BITS 7 +#define FILTER_BITS 7 typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 84d090c31..a75d1a9a4 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -14,8 +14,8 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_seg_common.h" -const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { +const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES] + [INTRA_MODES - 1] = { { 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */, { 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */, { 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */, @@ -29,15 +29,15 @@ const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES] }; static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS] - [VP9_INTRA_MODES - 1] = { + [INTRA_MODES - 1] = { { 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */, { 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */, { 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */, { 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */ }; -static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { +static const vp9_prob default_if_uv_probs[INTRA_MODES] + [INTRA_MODES - 1] = { { 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */, { 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */, { 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */, @@ -98,9 +98,9 @@ static const vp9_prob default_partition_probs[NUM_FRAME_TYPES] } }; -const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES] - [VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { +const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES] + [INTRA_MODES] + [INTRA_MODES - 1] = { { /* above = dc */ { 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */, { 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */, @@ -215,7 +215,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES] }; static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] - [VP9_INTER_MODES - 1] = { + [INTER_MODES - 1] = { {2, 173, 34}, // 0 = both zero mv {7, 145, 85}, // 1 = one zero mv + one a predicted mv {7, 166, 63}, // 2 = two predicted mvs @@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] }; /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ -const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = { +const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = { -DC_PRED, 2, /* 0 = DC_NODE */ -TM_PRED, 4, /* 1 = TM_NODE */ -V_PRED, 6, /* 2 = V_NODE */ @@ -250,8 +250,8 @@ const vp9_tree_index vp9_partition_tree[6] = { -PARTITION_VERT, -PARTITION_SPLIT }; -struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES]; -struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES]; +struct vp9_token vp9_intra_mode_encodings[INTRA_MODES]; +struct vp9_token vp9_inter_mode_encodings[INTER_MODES]; struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; @@ -317,8 +317,8 @@ static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = { 192, 128, 64 }; -static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1] - [VP9_SWITCHABLE_FILTERS-1] = { +static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTERS+1] + [SWITCHABLE_FILTERS-1] = { { 235, 162, }, { 36, 255, }, { 34, 3, }, @@ -338,11 +338,11 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) { vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs); } -const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { +const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; -struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; +struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS]; void vp9_entropy_mode_init() { vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree); @@ -400,17 +400,17 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { counts->single_ref[i][j]); for (i = 0; i < INTER_MODE_CONTEXTS; i++) - update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree, + update_mode_probs(INTER_MODES, vp9_inter_mode_tree, counts->inter_mode[i], pre_fc->inter_mode_probs[i], fc->inter_mode_probs[i], NEARESTMV); for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, + update_mode_probs(INTRA_MODES, vp9_intra_mode_tree, counts->y_mode[i], pre_fc->y_mode_prob[i], fc->y_mode_prob[i], 0); - for (i = 0; i < VP9_INTRA_MODES; ++i) - update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, + for (i = 0; i < INTRA_MODES; ++i) + update_mode_probs(INTRA_MODES, vp9_intra_mode_tree, counts->uv_mode[i], pre_fc->uv_mode_prob[i], fc->uv_mode_prob[i], 0); @@ -421,8 +421,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { fc->partition_prob[INTER_FRAME][i], 0); if (cm->mcomp_filter_type == SWITCHABLE) { - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) - update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree, + for (i = 0; i <= SWITCHABLE_FILTERS; i++) + update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree, counts->switchable_interp[i], pre_fc->switchable_interp_prob[i], fc->switchable_interp_prob[i], 0); diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index fced2cdfe..2f8085df6 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -16,8 +16,8 @@ #define SUBMVREF_COUNT 5 #define TX_SIZE_CONTEXTS 2 -#define VP9_MODE_UPDATE_PROB 252 -#define VP9_SWITCHABLE_FILTERS 3 // number of switchable filters +#define MODE_UPDATE_PROB 252 +#define SWITCHABLE_FILTERS 3 // number of switchable filters // #define MODE_STATS @@ -35,24 +35,24 @@ struct tx_counts { unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; }; -extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; -extern const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1]; +extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; +extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] + [INTRA_MODES - 1]; extern const vp9_tree_index vp9_intra_mode_tree[]; extern const vp9_tree_index vp9_inter_mode_tree[]; -extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES]; -extern struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES]; +extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES]; +extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES]; // probability models for partition information extern const vp9_tree_index vp9_partition_tree[]; extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; extern const vp9_tree_index vp9_switchable_interp_tree - [2 * (VP9_SWITCHABLE_FILTERS - 1)]; + [2 * (SWITCHABLE_FILTERS - 1)]; -extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; +extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS]; void vp9_entropy_mode_init(); diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index 85a1f3aa0..92581da2c 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -24,7 +24,7 @@ void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); int vp9_use_mv_hp(const MV *ref); -#define VP9_NMV_UPDATE_PROB 252 +#define NMV_UPDATE_PROB 252 /* Symbols for coding which components are zero jointly */ #define MV_JOINTS 4 diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index 33a97ccb3..7b1ffaeda 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -27,7 +27,7 @@ extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][SUBPEL_TAPS]; // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear // filter kernel as a 2 tap filter. -#define VP9_BILINEAR_FILTERS_2TAP(x) \ +#define BILINEAR_FILTERS_2TAP(x) \ (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1) #endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index d40f04f95..a669cc5e7 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -38,14 +38,14 @@ #define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LOG2) typedef struct frame_contexts { - vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES - 1]; - vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; + vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; + vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS] [PARTITION_TYPES - 1]; vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES]; - vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS - 1]; - vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1]; + vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1] + [SWITCHABLE_FILTERS - 1]; + vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS]; vp9_prob single_ref_prob[REF_CONTEXTS][2]; @@ -56,15 +56,15 @@ typedef struct frame_contexts { } FRAME_CONTEXT; typedef struct { - unsigned int y_mode[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES]; - unsigned int uv_mode[VP9_INTRA_MODES][VP9_INTRA_MODES]; + unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; + unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES]; unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES] [COEF_BANDS][PREV_COEF_CONTEXTS]; - unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS]; - unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES]; + unsigned int switchable_interp[SWITCHABLE_FILTERS + 1] + [SWITCHABLE_FILTERS]; + unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; unsigned int single_ref[REF_CONTEXTS][2][2]; diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index 58e0e1d67..859c99ed5 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -53,7 +53,7 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = { { RGB_TO_YUV(0xCC33FF) }, /* Magenta */ }; -static const unsigned char B_PREDICTION_MODE_colors[VP9_INTRA_MODES][3] = { +static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = { { RGB_TO_YUV(0x6633ff) }, /* Purple */ { RGB_TO_YUV(0xcc33ff) }, /* Magenta */ { RGB_TO_YUV(0xff33cc) }, /* Pink */ diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index b00f58392..97ccb1376 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -31,25 +31,25 @@ unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { const int left_mv_pred = is_inter_mode(left_mbmi->mode); const int left_interp = left_in_image && left_mv_pred ? left_mbmi->interp_filter - : VP9_SWITCHABLE_FILTERS; + : SWITCHABLE_FILTERS; // above const int above_mv_pred = is_inter_mode(above_mbmi->mode); const int above_interp = above_in_image && above_mv_pred ? above_mbmi->interp_filter - : VP9_SWITCHABLE_FILTERS; + : SWITCHABLE_FILTERS; if (left_interp == above_interp) return left_interp; - else if (left_interp == VP9_SWITCHABLE_FILTERS && - above_interp != VP9_SWITCHABLE_FILTERS) + else if (left_interp == SWITCHABLE_FILTERS && + above_interp != SWITCHABLE_FILTERS) return above_interp; - else if (left_interp != VP9_SWITCHABLE_FILTERS && - above_interp == VP9_SWITCHABLE_FILTERS) + else if (left_interp != SWITCHABLE_FILTERS && + above_interp == SWITCHABLE_FILTERS) return left_interp; else - return VP9_SWITCHABLE_FILTERS; + return SWITCHABLE_FILTERS; } // Returns a context number for the given MB prediction signal unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index f1d855695..4a451b909 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -285,7 +285,7 @@ intra_pred_allsizes(dc) typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); -static intra_pred_fn pred[VP9_INTRA_MODES][4]; +static intra_pred_fn pred[INTRA_MODES][4]; static intra_pred_fn dc_pred[2][2][4]; static void init_intra_pred_fn_ptrs(void) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index d075443ed..30c1b26d0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -295,7 +295,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon # dct # prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct4x4_1_add sse2 +specialize vp9_short_idct4x4_1_add sse2 neon prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct4x4_add sse2 neon @@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b specialize vp9_quantize_b $ssse3_x86_64 prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" -specialize vp9_quantize_b_32x32 $ssse3_x86_64 +specialize vp9_quantize_b_32x32 # # Structured Similarity (SSIM) diff --git a/vp9/common/vp9_scale.c b/vp9/common/vp9_scale.c index 0b8dc23ea..989206c60 100644 --- a/vp9/common/vp9_scale.c +++ b/vp9/common/vp9_scale.c @@ -13,11 +13,11 @@ #include "vp9/common/vp9_scale.h" static INLINE int scaled_x(int val, const struct scale_factors *scale) { - return val * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT; + return val * scale->x_scale_fp >> REF_SCALE_SHIFT; } static INLINE int scaled_y(int val, const struct scale_factors *scale) { - return val * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT; + return val * scale->y_scale_fp >> REF_SCALE_SHIFT; } static int unscaled_value(int val, const struct scale_factors *scale) { @@ -58,7 +58,7 @@ static int get_fixed_point_scale_factor(int other_size, int this_size) { // and use fixed point scaling factors in decoding and encoding routines. // Hardware implementations can calculate scale factor in device driver // and use multiplication and shifting on hardware instead of division. - return (other_size << VP9_REF_SCALE_SHIFT) / this_size; + return (other_size << REF_SCALE_SHIFT) / this_size; } static int check_scale_factors(int other_w, int other_h, @@ -73,8 +73,8 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, int other_w, int other_h, int this_w, int this_h) { if (!check_scale_factors(other_w, other_h, this_w, this_h)) { - scale->x_scale_fp = VP9_REF_INVALID_SCALE; - scale->y_scale_fp = VP9_REF_INVALID_SCALE; + scale->x_scale_fp = REF_INVALID_SCALE; + scale->y_scale_fp = REF_INVALID_SCALE; return; } diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h index 827ae9bce..7a720d035 100644 --- a/vp9/common/vp9_scale.h +++ b/vp9/common/vp9_scale.h @@ -14,9 +14,9 @@ #include "vp9/common/vp9_mv.h" #include "vp9/common/vp9_convolve.h" -#define VP9_REF_SCALE_SHIFT 14 -#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT) -#define VP9_REF_INVALID_SCALE -1 +#define REF_SCALE_SHIFT 14 +#define REF_NO_SCALE (1 << REF_SCALE_SHIFT) +#define REF_INVALID_SCALE -1 struct scale_factors { int x_scale_fp; // horizontal fixed point scale factor @@ -39,13 +39,13 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, int this_w, int this_h); static int vp9_is_valid_scale(const struct scale_factors *sf) { - return sf->x_scale_fp != VP9_REF_INVALID_SCALE && - sf->y_scale_fp != VP9_REF_INVALID_SCALE; + return sf->x_scale_fp != REF_INVALID_SCALE && + sf->y_scale_fp != REF_INVALID_SCALE; } static int vp9_is_scaled(const struct scale_factors *sf) { - return sf->x_scale_fp != VP9_REF_NO_SCALE || - sf->y_scale_fp != VP9_REF_NO_SCALE; + return sf->x_scale_fp != REF_NO_SCALE || + sf->y_scale_fp != REF_NO_SCALE; } #endif // VP9_COMMON_VP9_SCALE_H_ diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h index 78d42359b..fe75481f6 100644 --- a/vp9/common/vp9_subpelvar.h +++ b/vp9/common/vp9_subpelvar.h @@ -81,7 +81,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, for (j = 0; j < output_width; j++) { output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + (int)src_ptr[pixel_step] * vp9_filter[1], - VP9_FILTER_BITS); + FILTER_BITS); src_ptr++; } @@ -133,7 +133,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, for (j = 0; j < output_width; j++) { output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + (int)src_ptr[pixel_step] * vp9_filter[1], - VP9_FILTER_BITS); + FILTER_BITS); src_ptr++; } diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c index 31b1ae2b0..06acec4db 100644 --- a/vp9/decoder/vp9_dboolhuff.c +++ b/vp9/decoder/vp9_dboolhuff.c @@ -16,7 +16,7 @@ // This is meant to be a large, positive constant that can still be efficiently // loaded as an immediate (on platforms like ARM, for example). // Even relatively modest values like 100 would work fine. -#define VP9_LOTS_OF_BITS 0x40000000 +#define LOTS_OF_BITS 0x40000000 int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size) { @@ -41,13 +41,13 @@ void vp9_reader_fill(vp9_reader *r) { const uint8_t *buffer = r->buffer; VP9_BD_VALUE value = r->value; int count = r->count; - int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8); + int shift = BD_VALUE_SIZE - 8 - (count + 8); int loop_end = 0; const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT); const int x = shift + CHAR_BIT - bits_left; if (x >= 0) { - count += VP9_LOTS_OF_BITS; + count += LOTS_OF_BITS; loop_end = x; } @@ -66,7 +66,7 @@ void vp9_reader_fill(vp9_reader *r) { const uint8_t *vp9_reader_find_end(vp9_reader *r) { // Find the end of the coded buffer - while (r->count > CHAR_BIT && r->count < VP9_BD_VALUE_SIZE) { + while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) { r->count -= CHAR_BIT; r->buffer--; } @@ -83,10 +83,10 @@ int vp9_reader_has_error(vp9_reader *r) { // // When reading a byte from the user's buffer, count is filled with 8 and // one byte is filled into the value buffer. When we reach the end of the - // data, count is additionally filled with VP9_LOTS_OF_BITS. So when - // count == VP9_LOTS_OF_BITS - 1, the user's data has been exhausted. + // data, count is additionally filled with LOTS_OF_BITS. So when + // count == LOTS_OF_BITS - 1, the user's data has been exhausted. // // 1 if we have tried to decode bits after the end of stream was encountered. // 0 No error. - return r->count > VP9_BD_VALUE_SIZE && r->count < VP9_LOTS_OF_BITS; + return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS; } diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index c46dd73a3..c86451649 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -20,7 +20,7 @@ typedef size_t VP9_BD_VALUE; -#define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) +#define BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) typedef struct { const uint8_t *buffer_end; @@ -52,7 +52,7 @@ static int vp9_read(vp9_reader *br, int probability) { value = br->value; count = br->count; - bigsplit = (VP9_BD_VALUE)split << (VP9_BD_VALUE_SIZE - 8); + bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8); range = split; diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 747877d80..d1c59c364 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -251,7 +251,7 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, } static void update_mv(vp9_reader *r, vp9_prob *p) { - if (vp9_read(r, VP9_NMV_UPDATE_PROB)) + if (vp9_read(r, NMV_UPDATE_PROB)) *p = (vp9_read_literal(r, 7) << 1) | 1; } @@ -345,17 +345,17 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { int i, j; - for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j) - for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j) + for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); } static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { int i, j; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - for (j = 0; j < VP9_INTER_MODES - 1; ++j) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + for (j = 0; j < INTER_MODES - 1; ++j) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); } @@ -551,22 +551,14 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, switch (mbmi->mode) { case NEARMV: mv0->as_int = nearby.as_int; - clamp_mv2(&mv0->as_mv, xd); - - if (is_compound) { + if (is_compound) mv1->as_int = nearby_second.as_int; - clamp_mv2(&mv1->as_mv, xd); - } break; case NEARESTMV: mv0->as_int = nearest.as_int; - clamp_mv2(&mv0->as_mv, xd); - - if (is_compound) { + if (is_compound) mv1->as_int = nearest_second.as_int; - clamp_mv2(&mv1->as_mv, xd); - } break; case ZEROMV: @@ -615,20 +607,20 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { if (cm->comp_pred_mode == HYBRID_PREDICTION) for (i = 0; i < COMP_INTER_CONTEXTS; i++) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) for (i = 0; i < REF_CONTEXTS; i++) { - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]); - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]); } if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) for (i = 0; i < REF_CONTEXTS; i++) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); } @@ -639,7 +631,7 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove. // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs)); for (k = 0; k < MBSKIP_CONTEXTS; ++k) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]); if (cm->frame_type != KEY_FRAME && !cm->intra_only) { @@ -653,19 +645,19 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { read_switchable_interp_probs(&cm->fc, r); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]); read_comp_pred(cm, r); for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - for (i = 0; i < VP9_INTRA_MODES - 1; ++i) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + for (i = 0; i < INTRA_MODES - 1; ++i) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]); for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j) for (i = 0; i < PARTITION_TYPES - 1; ++i) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]); read_mv_probs(r, nmvc, xd->allow_high_precision_mv); diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 5e9d24edc..fd88b6e6c 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -63,17 +63,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 3; ++j) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 2; ++j) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 1; ++j) - if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + if (vp9_read(r, MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); } @@ -592,6 +592,7 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { lf_data->frame_buffer = fb; lf_data->cm = pc; lf_data->xd = pbi->mb; + lf_data->stop = 0; lf_data->y_only = 0; } vp9_loop_filter_frame_init(pc, pc->lf.filter_level); @@ -615,6 +616,9 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { if (num_threads > 1) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + // decoding has completed: finish up the loop filter in this thread. + if (mi_row + MI_BLOCK_SIZE >= pc->cur_tile_mi_row_end) continue; + vp9_worker_sync(&pbi->lf_worker); lf_data->start = lf_start; lf_data->stop = mi_row; @@ -627,13 +631,17 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { } if (pbi->do_loopfilter_inline) { + int lf_start; if (num_threads > 1) { - // TODO(jzern): since the loop filter is delayed one mb row, this will be - // forced to wait for the last row scheduled in the for loop. + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + vp9_worker_sync(&pbi->lf_worker); + lf_start = lf_data->stop; + } else { + lf_start = mi_row - MI_BLOCK_SIZE; } vp9_loop_filter_rows(fb, pc, &pbi->mb, - mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0); + lf_start, pc->mi_rows, 0); } } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index f6846e7fe..d7c73b665 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -41,9 +41,9 @@ unsigned __int64 Sectionbits[500]; #endif #ifdef ENTROPY_STATS -int intra_mode_stats[VP9_INTRA_MODES] - [VP9_INTRA_MODES] - [VP9_INTRA_MODES]; +int intra_mode_stats[INTRA_MODES] + [INTRA_MODES] + [INTRA_MODES]; vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES]; extern unsigned int active_section; @@ -54,8 +54,8 @@ extern unsigned int active_section; int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES]; int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1]; int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2]; -int64_t switchable_interp_stats[VP9_SWITCHABLE_FILTERS+1] - [VP9_SWITCHABLE_FILTERS]; +int64_t switchable_interp_stats[SWITCHABLE_FILTERS+1] + [SWITCHABLE_FILTERS]; void init_tx_count_stats() { vp9_zero(tx_count_32x32p_stats); @@ -88,8 +88,8 @@ static void update_tx_count_stats(VP9_COMMON *cm) { static void update_switchable_interp_stats(VP9_COMMON *cm) { int i, j; - for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; ++i) - for (j = 0; j < VP9_SWITCHABLE_FILTERS; ++j) { + for (i = 0; i < SWITCHABLE_FILTERS+1; ++i) + for (j = 0; j < SWITCHABLE_FILTERS; ++j) { switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j]; } } @@ -141,11 +141,11 @@ void write_switchable_interp_stats() { fclose(fp); printf( - "vp9_default_switchable_filter_count[VP9_SWITCHABLE_FILTERS+1]" - "[VP9_SWITCHABLE_FILTERS] = {\n"); - for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; i++) { + "vp9_default_switchable_filter_count[SWITCHABLE_FILTERS+1]" + "[SWITCHABLE_FILTERS] = {\n"); + for (i = 0; i < SWITCHABLE_FILTERS+1; i++) { printf(" { "); - for (j = 0; j < VP9_SWITCHABLE_FILTERS; j++) { + for (j = 0; j < SWITCHABLE_FILTERS; j++) { printf("%"PRId64", ", switchable_interp_stats[i][j]); } printf("},\n"); @@ -181,7 +181,7 @@ static void update_mode( n--; for (i = 0; i < n; ++i) { - vp9_cond_prob_diff_update(w, &Pcur[i], VP9_MODE_UPDATE_PROB, bct[i]); + vp9_cond_prob_diff_update(w, &Pcur[i], MODE_UPDATE_PROB, bct[i]); } } @@ -189,11 +189,11 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi, vp9_writer* const bc) { VP9_COMMON *const cm = &cpi->common; int j; - vp9_prob pnew[VP9_INTRA_MODES - 1]; - unsigned int bct[VP9_INTRA_MODES - 1][2]; + vp9_prob pnew[INTRA_MODES - 1]; + unsigned int bct[INTRA_MODES - 1][2]; for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_tree, pnew, + update_mode(bc, INTRA_MODES, vp9_intra_mode_tree, pnew, cm->fc.y_mode_prob[j], bct, (unsigned int *)cpi->y_mode_count[j]); } @@ -228,7 +228,7 @@ void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *w) { for (k = 0; k < MBSKIP_CONTEXTS; ++k) vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], - VP9_MODE_UPDATE_PROB, cm->counts.mbskip[k]); + MODE_UPDATE_PROB, cm->counts.mbskip[k]); } static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) { @@ -238,20 +238,20 @@ static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) { static void update_switchable_interp_probs(VP9_COMP *const cpi, vp9_writer* const bc) { VP9_COMMON *const pc = &cpi->common; - unsigned int branch_ct[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS - 1][2]; - vp9_prob new_prob[VP9_SWITCHABLE_FILTERS + 1][VP9_SWITCHABLE_FILTERS - 1]; + unsigned int branch_ct[SWITCHABLE_FILTERS + 1] + [SWITCHABLE_FILTERS - 1][2]; + vp9_prob new_prob[SWITCHABLE_FILTERS + 1][SWITCHABLE_FILTERS - 1]; int i, j; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { + for (j = 0; j <= SWITCHABLE_FILTERS; ++j) { vp9_tree_probs_from_distribution( vp9_switchable_interp_tree, new_prob[j], branch_ct[j], pc->counts.switchable_interp[j], 0); } - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { + for (j = 0; j <= SWITCHABLE_FILTERS; ++j) { + for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) { vp9_cond_prob_diff_update(bc, &pc->fc.switchable_interp_prob[j][i], - VP9_MODE_UPDATE_PROB, branch_ct[j][i]); + MODE_UPDATE_PROB, branch_ct[j][i]); } } #ifdef MODE_STATS @@ -264,16 +264,16 @@ static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) { int i, j; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { - unsigned int branch_ct[VP9_INTER_MODES - 1][2]; - vp9_prob new_prob[VP9_INTER_MODES - 1]; + unsigned int branch_ct[INTER_MODES - 1][2]; + vp9_prob new_prob[INTER_MODES - 1]; vp9_tree_probs_from_distribution(vp9_inter_mode_tree, new_prob, branch_ct, pc->counts.inter_mode[i], NEARESTMV); - for (j = 0; j < VP9_INTER_MODES - 1; ++j) + for (j = 0; j < INTER_MODES - 1; ++j) vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j], - VP9_MODE_UPDATE_PROB, branch_ct[j]); + MODE_UPDATE_PROB, branch_ct[j]); } } @@ -1049,7 +1049,7 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { ct_8x8p); for (j = 0; j < TX_SIZES - 3; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], - VP9_MODE_UPDATE_PROB, ct_8x8p[j]); + MODE_UPDATE_PROB, ct_8x8p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { @@ -1057,14 +1057,14 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { ct_16x16p); for (j = 0; j < TX_SIZES - 2; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j], - VP9_MODE_UPDATE_PROB, ct_16x16p[j]); + MODE_UPDATE_PROB, ct_16x16p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p); for (j = 0; j < TX_SIZES - 1; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j], - VP9_MODE_UPDATE_PROB, ct_32x32p[j]); + MODE_UPDATE_PROB, ct_32x32p[j]); } #ifdef MODE_STATS if (!cpi->dummy_packing) @@ -1087,17 +1087,17 @@ static void fix_mcomp_filter_type(VP9_COMP *cpi) { if (cm->mcomp_filter_type == SWITCHABLE) { // Check to see if only one of the filters is actually used - int count[VP9_SWITCHABLE_FILTERS]; + int count[SWITCHABLE_FILTERS]; int i, j, c = 0; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + for (i = 0; i < SWITCHABLE_FILTERS; ++i) { count[i] = 0; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) + for (j = 0; j <= SWITCHABLE_FILTERS; ++j) count[i] += cm->counts.switchable_interp[j][i]; c += (count[i] > 0); } if (c == 1) { // Only one filter is used. So set the filter at frame level - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + for (i = 0; i < SWITCHABLE_FILTERS; ++i) { if (count[i]) { cm->mcomp_filter_type = i; break; @@ -1386,7 +1386,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i], - VP9_MODE_UPDATE_PROB, + MODE_UPDATE_PROB, cpi->intra_inter_count[i]); if (cm->allow_comp_inter_inter) { @@ -1400,7 +1400,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (use_hybrid_pred) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], - VP9_MODE_UPDATE_PROB, + MODE_UPDATE_PROB, cpi->comp_inter_count[i]); } } @@ -1408,10 +1408,10 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { for (i = 0; i < REF_CONTEXTS; i++) { vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0], - VP9_MODE_UPDATE_PROB, + MODE_UPDATE_PROB, cpi->single_ref_count[i][0]); vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1], - VP9_MODE_UPDATE_PROB, + MODE_UPDATE_PROB, cpi->single_ref_count[i][1]); } } @@ -1419,7 +1419,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) for (i = 0; i < REF_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i], - VP9_MODE_UPDATE_PROB, + MODE_UPDATE_PROB, cpi->comp_ref_count[i]); update_mbintra_mode_probs(cpi, &header_bc); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 790b3c22c..9426f44ab 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -48,7 +48,7 @@ typedef struct { int comp_pred_diff; int single_pred_diff; int64_t tx_rd_diff[TX_MODES]; - int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; + int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]; // Bit flag for each mode whether it has high error in comparison to others. unsigned int modes_with_high_error; @@ -121,9 +121,9 @@ struct macroblock { int mbmode_cost[MB_MODE_COUNT]; unsigned inter_mode_cost[INTER_MODE_CONTEXTS][MB_MODE_COUNT - NEARESTMV]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; - int y_mode_costs[VP9_INTRA_MODES][VP9_INTRA_MODES][VP9_INTRA_MODES]; - int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS]; + int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; + int switchable_interp_costs[SWITCHABLE_FILTERS + 1] + [SWITCHABLE_FILTERS]; // These define limits to motion vector components to prevent them // from extending outside the UMV borders diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f8da500e6..360abad77 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -56,7 +56,7 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); * This also avoids the need for divide by zero checks in * vp9_activity_masking(). */ -#define VP9_ACTIVITY_AVG_MIN (64) +#define ACTIVITY_AVG_MIN (64) /* Motion vector component magnitude threshold for defining fast motion. */ #define FAST_MOTION_MV_THRESH (24) @@ -131,8 +131,8 @@ static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) { mb_activity = tt_activity_measure(x); } - if (mb_activity < VP9_ACTIVITY_AVG_MIN) - mb_activity = VP9_ACTIVITY_AVG_MIN; + if (mb_activity < ACTIVITY_AVG_MIN) + mb_activity = ACTIVITY_AVG_MIN; return mb_activity; } @@ -182,8 +182,8 @@ static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) { cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs); #endif // ACT_MEDIAN - if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN) - cpi->activity_avg = VP9_ACTIVITY_AVG_MIN; + if (cpi->activity_avg < ACTIVITY_AVG_MIN) + cpi->activity_avg = ACTIVITY_AVG_MIN; // Experimental code: return fixed value normalized for several clips if (ALT_ACT_MEASURE) @@ -448,7 +448,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff; cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff; - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) + for (i = 0; i <= SWITCHABLE_FILTERS; i++) cpi->rd_filter_diff[i] += ctx->best_filter_diff[i]; } } @@ -2414,15 +2414,15 @@ void vp9_encode_frame(VP9_COMP *cpi) { cpi->rd_filter_threshes[frame_type][1] > cpi->rd_filter_threshes[frame_type][2] && cpi->rd_filter_threshes[frame_type][1] > - cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) { + cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) { filter_type = EIGHTTAP_SMOOTH; } else if (cpi->rd_filter_threshes[frame_type][2] > cpi->rd_filter_threshes[frame_type][0] && cpi->rd_filter_threshes[frame_type][2] > - cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) { + cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) { filter_type = EIGHTTAP_SHARP; } else if (cpi->rd_filter_threshes[frame_type][0] > - cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) { + cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) { filter_type = EIGHTTAP; } else { filter_type = SWITCHABLE; @@ -2445,7 +2445,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; } - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) { + for (i = 0; i <= SWITCHABLE_FILTERS; i++) { const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs; cpi->rd_filter_threshes[frame_type][i] = (cpi->rd_filter_threshes[frame_type][i] + diff) / 2; diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index 2b7cb0b5b..1203c00ab 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -236,22 +236,22 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) { for (j = 0; j < MV_JOINTS - 1; ++j) update_mv(bc, branch_ct_joint[j], &mvc->joints[j], prob.joints[j], - VP9_NMV_UPDATE_PROB); + NMV_UPDATE_PROB); for (i = 0; i < 2; ++i) { update_mv(bc, branch_ct_sign[i], &mvc->comps[i].sign, - prob.comps[i].sign, VP9_NMV_UPDATE_PROB); + prob.comps[i].sign, NMV_UPDATE_PROB); for (j = 0; j < MV_CLASSES - 1; ++j) update_mv(bc, branch_ct_classes[i][j], &mvc->comps[i].classes[j], - prob.comps[i].classes[j], VP9_NMV_UPDATE_PROB); + prob.comps[i].classes[j], NMV_UPDATE_PROB); for (j = 0; j < CLASS0_SIZE - 1; ++j) update_mv(bc, branch_ct_class0[i][j], &mvc->comps[i].class0[j], - prob.comps[i].class0[j], VP9_NMV_UPDATE_PROB); + prob.comps[i].class0[j], NMV_UPDATE_PROB); for (j = 0; j < MV_OFFSET_BITS; ++j) update_mv(bc, branch_ct_bits[i][j], &mvc->comps[i].bits[j], - prob.comps[i].bits[j], VP9_NMV_UPDATE_PROB); + prob.comps[i].bits[j], NMV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { @@ -260,20 +260,20 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) { for (k = 0; k < 3; ++k) update_mv(bc, branch_ct_class0_fp[i][j][k], &mvc->comps[i].class0_fp[j][k], - prob.comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB); + prob.comps[i].class0_fp[j][k], NMV_UPDATE_PROB); } for (j = 0; j < 3; ++j) update_mv(bc, branch_ct_fp[i][j], &mvc->comps[i].fp[j], - prob.comps[i].fp[j], VP9_NMV_UPDATE_PROB); + prob.comps[i].fp[j], NMV_UPDATE_PROB); } if (usehp) { for (i = 0; i < 2; ++i) { update_mv(bc, branch_ct_class0_hp[i], &mvc->comps[i].class0_hp, - prob.comps[i].class0_hp, VP9_NMV_UPDATE_PROB); + prob.comps[i].class0_hp, NMV_UPDATE_PROB); update_mv(bc, branch_ct_hp[i], &mvc->comps[i].hp, - prob.comps[i].hp, VP9_NMV_UPDATE_PROB); + prob.comps[i].hp, NMV_UPDATE_PROB); } } } diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c index 993aba767..5b2365308 100644 --- a/vp9/encoder/vp9_modecosts.c +++ b/vp9/encoder/vp9_modecosts.c @@ -20,8 +20,8 @@ void vp9_init_mode_costs(VP9_COMP *c) { const vp9_tree_p KT = vp9_intra_mode_tree; int i, j; - for (i = 0; i < VP9_INTRA_MODES; i++) { - for (j = 0; j < VP9_INTRA_MODES; j++) { + for (i = 0; i < INTRA_MODES; i++) { + for (j = 0; j < INTRA_MODES; j++) { vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], KT); } @@ -31,12 +31,12 @@ void vp9_init_mode_costs(VP9_COMP *c) { vp9_cost_tokens(c->mb.mbmode_cost, x->fc.y_mode_prob[1], vp9_intra_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], - x->fc.uv_mode_prob[VP9_INTRA_MODES - 1], vp9_intra_mode_tree); + x->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], - vp9_kf_uv_mode_prob[VP9_INTRA_MODES - 1], + vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree); - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) + for (i = 0; i <= SWITCHABLE_FILTERS; ++i) vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], x->fc.switchable_interp_prob[i], vp9_switchable_interp_tree); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 7e44ebd04..34bd43ef1 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -98,9 +98,9 @@ FILE *keyfile; #ifdef ENTROPY_STATS -extern int intra_mode_stats[VP9_INTRA_MODES] - [VP9_INTRA_MODES] - [VP9_INTRA_MODES]; +extern int intra_mode_stats[INTRA_MODES] + [INTRA_MODES] + [INTRA_MODES]; #endif #ifdef MODE_STATS @@ -444,9 +444,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { void vp9_update_mode_context_stats(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int i, j; - unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] = + unsigned int (*inter_mode_counts)[INTER_MODES - 1][2] = cm->fc.inter_mode_counts; - int64_t (*mv_ref_stats)[VP9_INTER_MODES - 1][2] = cpi->mv_ref_stats; + int64_t (*mv_ref_stats)[INTER_MODES - 1][2] = cpi->mv_ref_stats; FILE *f; // Read the past stats counters @@ -460,7 +460,7 @@ void vp9_update_mode_context_stats(VP9_COMP *cpi) { // Add in the values for this frame for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < VP9_INTER_MODES - 1; j++) { + for (j = 0; j < INTER_MODES - 1; j++) { mv_ref_stats[i][j][0] += (int64_t)inter_mode_counts[i][j][0]; mv_ref_stats[i][j][1] += (int64_t)inter_mode_counts[i][j][1]; } @@ -479,12 +479,12 @@ void print_mode_context(VP9_COMP *cpi) { fprintf(f, "#include \"vp9_entropy.h\"\n"); fprintf( f, - "const int inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1] ="); + "const int inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] ="); fprintf(f, "{\n"); for (j = 0; j < INTER_MODE_CONTEXTS; j++) { fprintf(f, " {/* %d */ ", j); fprintf(f, " "); - for (i = 0; i < VP9_INTER_MODES - 1; i++) { + for (i = 0; i < INTER_MODES - 1; i++) { int this_prob; int64_t count = cpi->mv_ref_stats[j][i][0] + cpi->mv_ref_stats[j][i][1]; if (count) @@ -735,7 +735,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_search_skip_flags = 0; sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 0; - sf->last_chroma_intra_mode = TM_PRED; + sf->intra_y_mode_mask = ALL_INTRA_MODES; + sf->intra_uv_mode_mask = ALL_INTRA_MODES; sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; sf->use_uv_intra_rd_estimate = 0; @@ -765,7 +766,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->static_segmentation = 0; #endif sf->use_avoid_tested_higherror = 1; - sf->adaptive_rd_thresh = 1; + sf->adaptive_rd_thresh = MIN((speed + 1), 4); if (speed == 1) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -798,6 +799,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // the main framework of partition search type. sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 16; + + sf->intra_y_mode_mask = INTRA_DC_TM_H_V; + sf->intra_uv_mode_mask = INTRA_DC_TM_H_V; } if (speed == 2) { sf->adjust_thresholds_by_speed = 1; @@ -819,7 +823,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { FLAG_SKIP_COMP_REFMISMATCH | FLAG_SKIP_INTRA_LOWVAR | FLAG_EARLY_TERMINATE; - sf->last_chroma_intra_mode = DC_PRED; + sf->intra_y_mode_mask = INTRA_DC_TM; + sf->intra_uv_mode_mask = INTRA_DC_TM; sf->use_uv_intra_rd_estimate = 1; sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; @@ -859,6 +864,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->subpel_iters_per_step = 1; sf->disable_split_var_thresh = 64; sf->disable_filter_search_var_thresh = 64; + sf->intra_y_mode_mask = INTRA_DC_ONLY; + sf->intra_uv_mode_mask = INTRA_DC_ONLY; } if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -1395,7 +1402,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { - int i; + int i, j; volatile union { VP9_COMP *cpi; VP9_PTR ptr; @@ -1597,9 +1604,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_set_speed_features(cpi); - // Set starting values of RD threshold multipliers (128 = *1) - for (i = 0; i < MAX_MODES; i++) - cpi->rd_thresh_mult[i] = 128; + // Default rd threshold factors for mode selection + for (i = 0; i < BLOCK_SIZES; ++i) + for (j = 0; j < MAX_MODES; ++j) + cpi->rd_thresh_freq_fact[i][j] = 32; #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \ SDX3F, SDX8F, SDX4DF)\ @@ -1800,18 +1808,18 @@ void vp9_remove_compressor(VP9_PTR *ptr) { fprintf(fmode, "\n#include \"vp9_entropymode.h\"\n\n"); fprintf(fmode, "const unsigned int vp9_kf_default_bmode_counts "); - fprintf(fmode, "[VP9_INTRA_MODES][VP9_INTRA_MODES]" - "[VP9_INTRA_MODES] =\n{\n"); + fprintf(fmode, "[INTRA_MODES][INTRA_MODES]" + "[INTRA_MODES] =\n{\n"); - for (i = 0; i < VP9_INTRA_MODES; i++) { + for (i = 0; i < INTRA_MODES; i++) { fprintf(fmode, " { // Above Mode : %d\n", i); - for (j = 0; j < VP9_INTRA_MODES; j++) { + for (j = 0; j < INTRA_MODES; j++) { fprintf(fmode, " {"); - for (k = 0; k < VP9_INTRA_MODES; k++) { + for (k = 0; k < INTRA_MODES; k++) { if (!intra_mode_stats[i][j][k]) fprintf(fmode, " %5d, ", 1); else @@ -2629,8 +2637,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set various flags etc to special state if it is a key frame if (cm->frame_type == KEY_FRAME) { - int i; - // Reset the loop filter deltas and segmentation map setup_features(cm); @@ -2643,10 +2649,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // The alternate reference frame cannot be active for a key frame cpi->source_alt_ref_active = 0; - // Reset the RD threshold multipliers to default of * 1 (128) - for (i = 0; i < MAX_MODES; i++) - cpi->rd_thresh_mult[i] = 128; - cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0); cm->frame_parallel_decoding_mode = (cpi->oxcf.frame_parallel_decoding_mode != 0); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index c7b35a8c6..653615949 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -79,15 +79,15 @@ typedef struct { vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES]; - vp9_prob y_mode_prob[4][VP9_INTRA_MODES - 1]; - vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; + vp9_prob y_mode_prob[4][INTRA_MODES - 1]; + vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; - vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS - 1]; + vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1] + [SWITCHABLE_FILTERS - 1]; - int inter_mode_counts[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; - vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1]; + int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2]; + vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; struct tx_probs tx_probs; vp9_prob mbskip_probs[MBSKIP_CONTEXTS]; @@ -238,6 +238,11 @@ typedef enum { // Other methods to come } SUBPEL_SEARCH_METHODS; +#define ALL_INTRA_MODES 0x3FF +#define INTRA_DC_ONLY 0x01 +#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) +#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) + typedef struct { int RD; SEARCH_METHODS search_method; @@ -288,7 +293,8 @@ typedef struct { // A source variance threshold below which filter search is disabled // Choose a very large value (UINT_MAX) to use 8-tap always unsigned int disable_filter_search_var_thresh; - MB_PREDICTION_MODE last_chroma_intra_mode; + int intra_y_mode_mask; + int intra_uv_mode_mask; int use_rd_breakout; int use_uv_intra_rd_estimate; int use_fast_lpf_pick; @@ -375,8 +381,6 @@ typedef struct VP9_COMP { int ref_frame_mask; int set_ref_frame_mask; - int rd_thresh_mult[MAX_MODES]; - int rd_baseline_thresh[BLOCK_SIZES][MAX_MODES]; int rd_threshes[BLOCK_SIZES][MAX_MODES]; int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; @@ -392,9 +396,9 @@ typedef struct VP9_COMP { // FIXME(rbultje) can this overflow? int rd_tx_select_threshes[4][TX_MODES]; - int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; - int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1]; - int64_t rd_filter_cache[VP9_SWITCHABLE_FILTERS + 1]; + int64_t rd_filter_diff[SWITCHABLE_FILTERS + 1]; + int64_t rd_filter_threshes[4][SWITCHABLE_FILTERS + 1]; + int64_t rd_filter_cache[SWITCHABLE_FILTERS + 1]; int RDMULT; int RDDIV; @@ -469,8 +473,8 @@ typedef struct VP9_COMP { int cq_target_quality; - int y_mode_count[4][VP9_INTRA_MODES]; - int y_uv_mode_count[VP9_INTRA_MODES][VP9_INTRA_MODES]; + int y_mode_count[4][INTRA_MODES]; + int y_uv_mode_count[INTRA_MODES][INTRA_MODES]; unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; nmv_context_counts NMVcount; @@ -635,8 +639,8 @@ typedef struct VP9_COMP { int dummy_packing; /* flag to indicate if packing is dummy */ - unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS]; + unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1] + [SWITCHABLE_FILTERS]; unsigned int txfm_stepdown_count[TX_SIZES]; @@ -657,7 +661,7 @@ typedef struct VP9_COMP { #endif #ifdef ENTROPY_STATS - int64_t mv_ref_stats[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; + int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2]; #endif } VP9_COMP; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 08b0c454a..74282aafe 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -104,9 +104,8 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { static int rd_thresh_block_size_factor[BLOCK_SIZES] = {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; -#define BASE_RD_THRESH_FREQ_FACT 16 -#define MAX_RD_THRESH_FREQ_FACT 32 -#define MAX_RD_THRESH_FREQ_INC 1 +#define MAX_RD_THRESH_FACT 64 +#define RD_THRESH_INC 1 static void fill_token_costs(vp9_coeff_cost *c, vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { @@ -212,12 +211,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { } else { cpi->rd_threshes[bsize][i] = INT_MAX; } - cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; - - if (cpi->sf.adaptive_rd_thresh) - cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT; - else - cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; } } } else { @@ -236,12 +229,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { } else { cpi->rd_threshes[bsize][i] = INT_MAX; } - cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; - - if (cpi->sf.adaptive_rd_thresh) - cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT; - else - cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; } } } @@ -1043,6 +1030,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; int ratey = 0; + + if (!(cpi->sf.intra_y_mode_mask & (1 << mode))) + continue; + // Only do the oblique modes if the best so far is // one of the neighboring directional modes if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { @@ -1228,6 +1219,9 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t local_tx_cache[TX_MODES]; const int mis = xd->mode_info_stride; + if (!(cpi->sf.intra_y_mode_mask & (1 << mode))) + continue; + if (cpi->common.frame_type == KEY_FRAME) { const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); const MB_PREDICTION_MODE L = xd->left_available ? @@ -1325,10 +1319,14 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; - MB_PREDICTION_MODE last_mode = bsize <= BLOCK_8X8 ? - TM_PRED : cpi->sf.last_chroma_intra_mode; + // int mode_mask = (bsize <= BLOCK_8X8) + // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask; + + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + // if (!(mode_mask & (1 << mode))) + if (!(cpi->sf.intra_uv_mode_mask & (1 << mode))) + continue; - for (mode = DC_PRED; mode <= last_mode; mode++) { x->e_mbd.mode_info_context->mbmi.uv_mode = mode; super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd); @@ -1599,7 +1597,7 @@ typedef struct { int64_t sse; int segment_yrate; MB_PREDICTION_MODE modes[4]; - SEG_RDSTAT rdstat[4][VP9_INTER_MODES]; + SEG_RDSTAT rdstat[4][INTER_MODES]; int mvthresh; } BEST_SEG_INFO; @@ -1962,7 +1960,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (best_rd == INT64_MAX) { int iy, midx; for (iy = i + 1; iy < 4; ++iy) - for (midx = 0; midx < VP9_INTER_MODES; ++midx) + for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; return; @@ -1986,7 +1984,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (this_segment_rd > bsi->segment_rd) { int iy, midx; for (iy = i + 1; iy < 4; ++iy) - for (midx = 0; midx < VP9_INTER_MODES; ++midx) + for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; return; @@ -2189,7 +2187,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int_mv *second_ref_mv, int64_t comp_pred_diff[NB_PREDICTION_TYPES], int64_t tx_size_diff[TX_MODES], - int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) { + int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) { MACROBLOCKD *const xd = &x->e_mbd; // Take a snapshot of the coding context so it can be @@ -2212,7 +2210,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // doesn't actually work this way memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); memcpy(ctx->best_filter_diff, best_filter_diff, - sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1)); + sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1)); } static void setup_pred_block(const MACROBLOCKD *xd, @@ -2259,10 +2257,10 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, scale[frame_type].x_offset_q4 = ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp, - VP9_REF_SCALE_SHIFT) & 0xf; + REF_SCALE_SHIFT) & 0xf; scale[frame_type].y_offset_q4 = ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp, - VP9_REF_SCALE_SHIFT) & 0xf; + REF_SCALE_SHIFT) & 0xf; // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this // use the UV scaling factors. @@ -2747,8 +2745,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int tmp_rate_sum = 0; int64_t tmp_dist_sum = 0; - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; + for (i = 0; i < SWITCHABLE_FILTERS; ++i) { int j; int64_t rs_rd; mbmi->interp_filter = i; @@ -2759,8 +2757,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (i > 0 && intpel_mv) { cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], + cpi->rd_filter_cache[SWITCHABLE_FILTERS] = + MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], cpi->rd_filter_cache[i] + rs_rd); rd = cpi->rd_filter_cache[i]; if (cm->mcomp_filter_type == SWITCHABLE) @@ -2787,8 +2785,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], + cpi->rd_filter_cache[SWITCHABLE_FILTERS] = + MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], cpi->rd_filter_cache[i] + rs_rd); rd = cpi->rd_filter_cache[i]; if (cm->mcomp_filter_type == SWITCHABLE) @@ -3080,8 +3078,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_tx_diff[TX_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; int64_t best_pred_rd[NB_PREDICTION_TYPES]; - int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1]; - int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; + int64_t best_filter_rd[SWITCHABLE_FILTERS + 1]; + int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]; MB_MODE_INFO best_mbmode = { 0 }; int j; int mode_index, best_mode_index = 0; @@ -3132,7 +3130,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_pred_rd[i] = INT64_MAX; for (i = 0; i < TX_MODES; i++) best_tx_rd[i] = INT64_MAX; - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) + for (i = 0; i <= SWITCHABLE_FILTERS; i++) best_filter_rd[i] = INT64_MAX; for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX; @@ -3216,7 +3214,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Test best rd so far against threshold for trying this mode. if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] * - cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) || + cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) || cpi->rd_threshes[bsize][mode_index] == INT_MAX) continue; @@ -3452,7 +3450,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, union b_mode_info tmp_best_bmodes[16]; MB_MODE_INFO tmp_best_mbmode; PARTITION_INFO tmp_best_partition; - BEST_SEG_INFO bsi[VP9_SWITCHABLE_FILTERS]; + BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; int pred_exists = 0; int uv_skippable; if (is_comp_pred) { @@ -3472,7 +3470,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; xd->mode_info_context->mbmi.txfm_size = TX_4X4; - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; + cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; if (cm->mcomp_filter_type != BILINEAR) { tmp_best_filter = EIGHTTAP; if (x->source_variance < @@ -3481,7 +3479,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(cpi->rd_filter_cache); } else { for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; + switchable_filter_index < SWITCHABLE_FILTERS; ++switchable_filter_index) { int newbest, rs; int64_t rs_rd; @@ -3503,8 +3501,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], + cpi->rd_filter_cache[SWITCHABLE_FILTERS] = + MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); if (cm->mcomp_filter_type == SWITCHABLE) tmp_rd += rs_rd; @@ -3721,7 +3719,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!disable_skip && ref_frame == INTRA_FRAME) { for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) + for (i = 0; i <= SWITCHABLE_FILTERS; i++) best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); } @@ -3777,29 +3775,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } -#if 0 - // Testing this mode gave rise to an improvement in best error score. - // Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = - (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? - cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) - * cpi->rd_thresh_mult[mode_index]; -#endif - } else { - // If the mode did not help improve the best error case then - // raise the threshold for testing that mode next time around. -#if 0 - cpi->rd_thresh_mult[mode_index] += 4; - - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - - cpi->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) - * cpi->rd_thresh_mult[mode_index]; -#endif } /* keep record of best compound/single-only prediction */ @@ -3832,8 +3807,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && cm->mcomp_filter_type != BILINEAR) { int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - VP9_SWITCHABLE_FILTERS : cm->mcomp_filter_type]; - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) { + SWITCHABLE_FILTERS : cm->mcomp_filter_type]; + for (i = 0; i <= SWITCHABLE_FILTERS; i++) { int64_t adj_rd; // In cases of poor prediction, filter_cache[] can contain really big // values, which actually are bigger than this_rd itself. This can @@ -3942,33 +3917,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { if (mode_index == best_mode_index) { - cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT; + cpi->rd_thresh_freq_fact[bsize][mode_index] -= + (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3); } else { - cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC; + cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC; if (cpi->rd_thresh_freq_fact[bsize][mode_index] > - (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) { + (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT)) { cpi->rd_thresh_freq_fact[bsize][mode_index] = - cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT; + cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT; } } } } - // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding -#if 0 - // Reduce the activation RD thresholds for the best choice mode - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && - (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); - - cpi->rd_thresh_mult[best_mode_index] = - (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = - (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; - } -#endif - // macroblock modes *mbmi = best_mbmode; x->skip |= best_skip2; @@ -4003,14 +3964,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (!x->skip) { - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) { + for (i = 0; i <= SWITCHABLE_FILTERS; i++) { if (best_filter_rd[i] == INT64_MAX) best_filter_diff[i] = 0; else best_filter_diff[i] = best_rd - best_filter_rd[i]; } if (cm->mcomp_filter_type == SWITCHABLE) - assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0); + assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff)); } diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index 325925cbd..155ba8a3e 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -50,8 +50,8 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, uint8_t temp2[68 * 64]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -73,8 +73,8 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -107,8 +107,8 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, uint8_t temp2[68 * 64]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -130,8 +130,8 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -164,8 +164,8 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, uint8_t temp2[36 * 32]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -187,8 +187,8 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -221,8 +221,8 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, uint8_t temp2[36 * 32]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -244,8 +244,8 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -442,8 +442,8 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, const int16_t *hfilter, *vfilter; uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, @@ -468,8 +468,8 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, @@ -492,8 +492,8 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, uint8_t temp2[20 * 16]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); @@ -515,8 +515,8 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); @@ -536,8 +536,8 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, uint8_t temp2[20 * 16]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -559,8 +559,8 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -581,8 +581,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, uint8_t temp2[68 * 64]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -604,8 +604,8 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -625,8 +625,8 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, uint8_t temp2[36 * 32]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -648,8 +648,8 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -789,8 +789,8 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, uint8_t temp2[20 * 16]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -812,8 +812,8 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -833,8 +833,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, uint8_t temp2[20 * 16]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -856,8 +856,8 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -877,8 +877,8 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, uint8_t temp2[20 * 16]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -900,8 +900,8 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -923,8 +923,8 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, uint8_t temp2[20 * 16]; const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -946,8 +946,8 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer const int16_t *hfilter, *vfilter; - hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = BILINEAR_FILTERS_2TAP(xoffset); + vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm index 19e2feb57..533456b77 100644 --- a/vp9/encoder/x86/vp9_subpel_variance.asm +++ b/vp9/encoder/x86/vp9_subpel_variance.asm @@ -270,8 +270,13 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %if mmsize == 16 movhps m2, [srcq+src_strideq*2] %else ; mmsize == 8 +%if %1 == 4 + movh m1, [srcq+src_strideq*2] + punpckldq m2, m1 +%else punpckldq m2, [srcq+src_strideq*2] %endif +%endif movh m1, [dstq] %if mmsize == 16 movlhps m0, m2 @@ -542,9 +547,16 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ movhps m2, [srcq+src_strideq] movhps m3, [srcq+src_strideq+1] %else +%if %1 == 4 + movh m1, [srcq+src_strideq] + punpckldq m2, m1 + movh m1, [srcq+src_strideq+1] + punpckldq m3, m1 +%else punpckldq m2, [srcq+src_strideq] punpckldq m3, [srcq+src_strideq+1] %endif +%endif pavgb m2, m3 %if mmsize == 16 movlhps m0, m2 diff --git a/vp9/encoder/x86/vp9_variance_impl_mmx.asm b/vp9/encoder/x86/vp9_variance_impl_mmx.asm index d3dbefed8..3501cf1fd 100644 --- a/vp9/encoder/x86/vp9_variance_impl_mmx.asm +++ b/vp9/encoder/x86/vp9_variance_impl_mmx.asm @@ -342,8 +342,8 @@ sym(vp9_get4x4var_mmx): movsxd rdx, dword ptr arg(3) ;[recon_stride] ; Row 1 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm1, [rbx] ; Copy eight bytes to mm1 + movd mm0, [rax] ; Copy 4 bytes to mm0 + movd mm1, [rbx] ; Copy 4 bytes to mm1 punpcklbw mm0, mm6 ; unpack to higher prrcision punpcklbw mm1, mm6 psubsw mm0, mm1 ; A-B (low order) to MM0 @@ -351,12 +351,12 @@ sym(vp9_get4x4var_mmx): pmaddwd mm0, mm0 ; square and accumulate add rbx,rdx ; Inc pointer into ref data add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 + movd mm1, [rbx] ; Copy 4 bytes to mm1 paddd mm7, mm0 ; accumulate in mm7 ; Row 2 - movq mm0, [rax] ; Copy eight bytes to mm0 + movd mm0, [rax] ; Copy 4 bytes to mm0 punpcklbw mm0, mm6 ; unpack to higher prrcision punpcklbw mm1, mm6 psubsw mm0, mm1 ; A-B (low order) to MM0 @@ -365,11 +365,11 @@ sym(vp9_get4x4var_mmx): pmaddwd mm0, mm0 ; square and accumulate add rbx,rdx ; Inc pointer into ref data add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 + movd mm1, [rbx] ; Copy 4 bytes to mm1 paddd mm7, mm0 ; accumulate in mm7 ; Row 3 - movq mm0, [rax] ; Copy eight bytes to mm0 + movd mm0, [rax] ; Copy 4 bytes to mm0 punpcklbw mm0, mm6 ; unpack to higher prrcision punpcklbw mm1, mm6 psubsw mm0, mm1 ; A-B (low order) to MM0 @@ -378,11 +378,11 @@ sym(vp9_get4x4var_mmx): pmaddwd mm0, mm0 ; square and accumulate add rbx,rdx ; Inc pointer into ref data add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 + movd mm1, [rbx] ; Copy 4 bytes to mm1 paddd mm7, mm0 ; accumulate in mm7 ; Row 4 - movq mm0, [rax] ; Copy eight bytes to mm0 + movd mm0, [rax] ; Copy 4 bytes to mm0 punpcklbw mm0, mm6 ; unpack to higher prrcision punpcklbw mm1, mm6 diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index c6f398101..d5692efb1 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -96,6 +96,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_1_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM) |