diff options
Diffstat (limited to 'vp9')
26 files changed, 431 insertions, 303 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index ba1aebf31..baa943b9c 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -347,60 +347,62 @@ loop_h ldrb r12, [r12] vdup.u8 d0, r12 + ; preload 8 left + vld1.8 d30, [r3] + ; Load above 8 pixels vld1.64 {d2}, [r2] + vmovl.u8 q10, d30 + ; Compute above - ytop_left vsubl.u8 q3, d2, d0 ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.16 q0, d20[0] + vdup.16 q1, d20[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 + vdup.16 q8, d20[2] + vdup.16 q9, d20[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqshrun.s16 d0, q0, #0 + vqshrun.s16 d1, q1, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 + vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 ; 5th row and 6th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.16 q0, d21[0] + vdup.16 q1, d21[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + ; 7th row and 8th row + vdup.16 q8, d21[2] + vdup.16 q9, d21[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqshrun.s16 d0, q0, #0 + vqshrun.s16 d1, q1, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 - ; 7rd row and 8th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + bx lr ENDP ; |vp9_tm_predictor_8x8_neon| diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index ad35c7ae0..9e16d8f78 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -135,7 +135,7 @@ typedef struct { // Flags used for prediction status of various bit-stream signals unsigned char seg_id_predicted; - INTERPOLATION_TYPE interp_filter; + INTERP_FILTER interp_filter; BLOCK_SIZE sb_type; } MB_MODE_INFO; @@ -252,7 +252,7 @@ typedef struct macroblockd { /* Inverse transform function pointers. */ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); - struct subpix_fn_table subpix; + const interp_kernel *interp_kernel; int corrupted; diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index 6edf7eaca..b105a57bc 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -20,7 +20,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *x_filters, + const interp_kernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; @@ -42,7 +42,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *x_filters, + const interp_kernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; @@ -65,7 +65,7 @@ static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *y_filters, + const interp_kernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); @@ -88,7 +88,7 @@ static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *y_filters, + const interp_kernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); @@ -112,9 +112,9 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *const x_filters, + const interp_kernel *const x_filters, int x0_q4, int x_step_q4, - const subpel_kernel *const y_filters, + const interp_kernel *const y_filters, int y0_q4, int y_step_q4, int w, int h) { // Fixed size intermediate buffer places limits on parameters. @@ -138,14 +138,14 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, y_filters, y0_q4, y_step_q4, w, h); } -static const subpel_kernel *get_filter_base(const int16_t *filter) { +static const interp_kernel *get_filter_base(const int16_t *filter) { // NOTE: This assumes that the filter table is 256-byte aligned. // TODO(agrange) Modify to make independent of table alignment. - return (const subpel_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); + return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); } -static int get_filter_offset(const int16_t *f, const subpel_kernel *base) { - return (const subpel_kernel *)(intptr_t)f - base; +static int get_filter_offset(const int16_t *f, const interp_kernel *base) { + return (const interp_kernel *)(intptr_t)f - base; } void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, @@ -153,7 +153,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_x = get_filter_base(filter_x); + const interp_kernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); convolve_horiz(src, src_stride, dst, dst_stride, filters_x, @@ -165,7 +165,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_x = get_filter_base(filter_x); + const interp_kernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, @@ -177,7 +177,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_y = get_filter_base(filter_y); + const interp_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); @@ -188,7 +188,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_y = get_filter_base(filter_y); + const interp_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); @@ -199,10 +199,10 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_x = get_filter_base(filter_x); + const interp_kernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); - const subpel_kernel *const filters_y = get_filter_base(filter_y); + const interp_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve(src, src_stride, dst, dst_stride, diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 83281b2ea..77b8de046 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -385,7 +385,7 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i], counts->partition[i], fc->partition_prob[i]); - if (cm->mcomp_filter_type == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i], counts->switchable_interp[i], fc->switchable_interp_prob[i]); diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index edb03961c..deec3f652 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -21,8 +21,6 @@ extern "C" { #define SWITCHABLE_FILTERS 3 // number of switchable filters #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) -// #define MODE_STATS - struct VP9Common; struct tx_probs { diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 79ace147c..9f400e9ad 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -14,7 +14,7 @@ #include "vp9/common/vp9_filter.h" -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_bilinear_filters[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, @@ -35,7 +35,7 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; // Lagrangian interpolation filter -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0}, { 0, 1, -5, 126, 8, -3, 1, 0}, @@ -56,7 +56,7 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; // DCT based filter -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = { {0, 0, 0, 128, 0, 0, 0, 0}, {-1, 3, -7, 127, 8, -3, 1, 0}, @@ -77,7 +77,7 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; // freqmultiplier = 0.5 -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0}, {-3, -1, 32, 64, 38, 1, -3, 0}, @@ -98,14 +98,14 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; -static const subpel_kernel* vp9_filter_kernels[4] = { +static const interp_kernel* vp9_filter_kernels[4] = { vp9_sub_pel_filters_8, vp9_sub_pel_filters_8lp, vp9_sub_pel_filters_8s, vp9_bilinear_filters }; -const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type) { - return vp9_filter_kernels[type]; +const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter) { + return vp9_filter_kernels[filter]; } diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index b02aaed9e..b611e304c 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -31,21 +31,16 @@ typedef enum { EIGHTTAP_SHARP = 2, BILINEAR = 3, SWITCHABLE = 4 /* should be the last one */ -} INTERPOLATION_TYPE; +} INTERP_FILTER; -typedef int16_t subpel_kernel[SUBPEL_TAPS]; +typedef int16_t interp_kernel[SUBPEL_TAPS]; -struct subpix_fn_table { - const subpel_kernel *filter_x; - const subpel_kernel *filter_y; -}; +const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter); -const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type); - -extern const subpel_kernel vp9_bilinear_filters[SUBPEL_SHIFTS]; -extern const subpel_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS]; -extern const subpel_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]; -extern const subpel_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_bilinear_filters[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]; // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear // filter kernel as a 2 tap filter. diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 39fa7b1bb..894134a0f 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -184,7 +184,7 @@ typedef struct VP9Common { // Persistent mb segment id map used in prediction. unsigned char *last_frame_seg_map; - INTERPOLATION_TYPE mcomp_filter_type; + INTERP_FILTER interp_filter; loop_filter_info_n lf_info; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index b5a9248c3..d554cc0ed 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -69,13 +69,11 @@ static void inter_predictor(const uint8_t *src, int src_stride, const int subpel_y, const struct scale_factors *sf, int w, int h, int ref, - const struct subpix_fn_table *subpix, + const interp_kernel *kernel, int xs, int ys) { sf->predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, - subpix->filter_x[subpel_x], xs, - subpix->filter_y[subpel_y], ys, - w, h); + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); } void vp9_build_inter_predictor(const uint8_t *src, int src_stride, @@ -83,7 +81,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, - const struct subpix_fn_table *subpix, + const interp_kernel *kernel, enum mv_precision precision, int x, int y) { const int is_q4 = precision == MV_PRECISION_Q4; @@ -96,7 +94,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, subpix, sf->x_step_q4, sf->y_step_q4); + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); } static INLINE int round_mv_comp_q4(int value) { @@ -198,7 +196,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + (scaled_mv.col >> SUBPEL_BITS); inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, &xd->subpix, xs, ys); + subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel, + xs, ys); } } @@ -367,7 +366,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, &xd->subpix, xs, ys); + subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys); } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 58f4b417e..10e16580e 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -18,7 +18,6 @@ extern "C" { #endif -struct subpix_fn_table; void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); @@ -36,7 +35,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg, - const struct subpix_fn_table *subpix, + const interp_kernel *kernel, enum mv_precision precision, int x, int y); diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index e6f1dfddf..1d9be5322 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -421,8 +421,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (has_second_ref(mbmi)) set_ref(cm, xd, 1, mi_row, mi_col); - xd->subpix.filter_x = xd->subpix.filter_y = - vp9_get_filter_kernel(mbmi->interp_filter); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Prediction vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); @@ -655,14 +654,13 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; } -static INTERPOLATION_TYPE read_interp_filter_type( - struct vp9_read_bit_buffer *rb) { - const INTERPOLATION_TYPE literal_to_type[] = { EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR }; +static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) { + const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, + EIGHTTAP, + EIGHTTAP_SHARP, + BILINEAR }; return vp9_rb_read_bit(rb) ? SWITCHABLE - : literal_to_type[vp9_rb_read_literal(rb, 2)]; + : literal_to_filter[vp9_rb_read_literal(rb, 2)]; } static void read_frame_size(struct vp9_read_bit_buffer *rb, @@ -1186,7 +1184,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, setup_frame_size_with_refs(pbi, rb); cm->allow_high_precision_mv = vp9_rb_read_bit(rb); - cm->mcomp_filter_type = read_interp_filter_type(rb); + cm->interp_filter = read_interp_filter(rb); for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_buf = &cm->frame_refs[i]; @@ -1256,7 +1254,7 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, read_inter_mode_probs(fc, &r); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) read_switchable_interp_probs(fc, &r); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 2eb99ea15..d7cd635bd 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -313,7 +313,7 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, } -static INLINE INTERPOLATION_TYPE read_switchable_filter_type( +static INLINE INTERP_FILTER read_switchable_interp_filter( VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) { const int ctx = vp9_get_pred_context_switchable_interp(xd); const int type = vp9_read_tree(r, vp9_switchable_interp_tree, @@ -459,9 +459,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, } } - mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE) - ? read_switchable_filter_type(cm, xd, r) - : cm->mcomp_filter_type; + mbmi->interp_filter = (cm->interp_filter == SWITCHABLE) + ? read_switchable_interp_filter(cm, xd, r) + : cm->interp_filter; if (bsize < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 2ab4c7907..8db517a9a 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -135,11 +135,6 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) { prob_diff_update(vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[j], cm->counts.switchable_interp[j], SWITCHABLE_FILTERS, w); - -#ifdef MODE_STATS - if (!cpi->dummy_packing) - update_switchable_interp_stats(cm); -#endif } static void pack_mb_tokens(vp9_writer* const w, @@ -330,13 +325,13 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { } } - if (cm->mcomp_filter_type == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); vp9_write_token(bc, vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[ctx], &switchable_interp_encodings[mi->interp_filter]); } else { - assert(mi->interp_filter == cm->mcomp_filter_type); + assert(mi->interp_filter == cm->interp_filter); } if (bsize < BLOCK_8X8) { @@ -912,24 +907,20 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j], ct_32x32p[j]); } -#ifdef MODE_STATS - if (!cpi->dummy_packing) - update_tx_count_stats(cm); -#endif } } -static void write_interp_filter_type(INTERPOLATION_TYPE type, - struct vp9_write_bit_buffer *wb) { - const int type_to_literal[] = { 1, 0, 2, 3 }; +static void write_interp_filter(INTERP_FILTER filter, + struct vp9_write_bit_buffer *wb) { + const int filter_to_literal[] = { 1, 0, 2, 3 }; - vp9_wb_write_bit(wb, type == SWITCHABLE); - if (type != SWITCHABLE) - vp9_wb_write_literal(wb, type_to_literal[type], 2); + vp9_wb_write_bit(wb, filter == SWITCHABLE); + if (filter != SWITCHABLE) + vp9_wb_write_literal(wb, filter_to_literal[filter], 2); } -static void fix_mcomp_filter_type(VP9_COMMON *cm) { - if (cm->mcomp_filter_type == SWITCHABLE) { +static void fix_interp_filter(VP9_COMMON *cm) { + if (cm->interp_filter == SWITCHABLE) { // Check to see if only one of the filters is actually used int count[SWITCHABLE_FILTERS]; int i, j, c = 0; @@ -943,7 +934,7 @@ static void fix_mcomp_filter_type(VP9_COMMON *cm) { // Only one filter is used. So set the filter at frame level for (i = 0; i < SWITCHABLE_FILTERS; ++i) { if (count[i]) { - cm->mcomp_filter_type = i; + cm->interp_filter = i; break; } } @@ -1171,8 +1162,8 @@ static void write_uncompressed_header(VP9_COMP *cpi, vp9_wb_write_bit(wb, cm->allow_high_precision_mv); - fix_mcomp_filter_type(cm); - write_interp_filter_type(cm->mcomp_filter_type, wb); + fix_interp_filter(cm); + write_interp_filter(cm->interp_filter, wb); } } @@ -1223,7 +1214,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { vp9_zero(cm->counts.inter_mode); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) update_switchable_interp_probs(cpi, &header_bc); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 29e68b1d7..716ad6127 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -62,7 +62,7 @@ typedef struct { // motion vector cache for adaptive motion search control in partition // search loop int_mv pred_mv[MAX_REF_FRAMES]; - int pred_filter_type; + INTERP_FILTER pred_interp_filter; // Bit flag for each mode whether it has high error in comparison to others. unsigned int modes_with_high_error; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 9d02c8f95..317ac9815 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -505,7 +505,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, vp9_update_mv_count(cpi, x, best_mv); } - if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) { + if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) { const int ctx = vp9_get_pred_context_switchable_interp(xd); ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; } @@ -1788,9 +1788,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = i; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd); @@ -1839,9 +1839,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); @@ -1854,9 +1854,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), @@ -1892,9 +1892,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); @@ -1906,9 +1906,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, &this_dist, subsize, get_block_context(x, subsize), @@ -2048,7 +2048,7 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) - get_block_context(x, i)->pred_filter_type = SWITCHABLE; + get_block_context(x, i)->pred_interp_filter = SWITCHABLE; } vp9_zero(cpi->mb.pred_mv); @@ -2460,7 +2460,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { if (cpi->sf.RD) { int i; REFERENCE_MODE reference_mode; - INTERPOLATION_TYPE filter_type; + INTERP_FILTER interp_filter; /* * This code does a single RD pass over the whole frame assuming * either compound, single or hybrid prediction as per whatever has @@ -2496,14 +2496,14 @@ void vp9_encode_frame(VP9_COMP *cpi) { filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] && filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] && filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) { - filter_type = EIGHTTAP_SMOOTH; + interp_filter = EIGHTTAP_SMOOTH; } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] && filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) { - filter_type = EIGHTTAP_SHARP; + interp_filter = EIGHTTAP_SHARP; } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) { - filter_type = EIGHTTAP; + interp_filter = EIGHTTAP; } else { - filter_type = SWITCHABLE; + interp_filter = SWITCHABLE; } cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; @@ -2511,7 +2511,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */ select_tx_mode(cpi); cm->reference_mode = reference_mode; - cm->mcomp_filter_type = filter_type; + cm->interp_filter = interp_filter; encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 528f250a6..5ea75c314 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -25,8 +25,7 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_tokenize.h" -void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATION_TYPE mcomp_filter_type, +void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERP_FILTER filter, VP9_COMMON *cm) { if (xd->mi_8x8 && xd->mi_8x8[0]) { MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; @@ -38,11 +37,9 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, set_ref_ptrs(cm, xd, -1, -1); } - xd->subpix.filter_x = xd->subpix.filter_y = - vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ? - EIGHTTAP : mcomp_filter_type); - - assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); + xd->interp_kernel = vp9_get_interp_kernel(filter == SWITCHABLE ? EIGHTTAP + : filter); + assert(((intptr_t)xd->interp_kernel & 0xff) == 0); } void vp9_subtract_block_c(int rows, int cols, diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 6a06f0ae4..c728efd49 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -48,8 +48,7 @@ void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize); int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred); -void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATION_TYPE mcomp_filter_type, +void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERP_FILTER filter, VP9_COMMON *cm); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 395ce2008..28b343ce7 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -447,6 +447,16 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } } +static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { + if (2 * mb_col + 1 < cm->mi_cols) { + return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16 + : BLOCK_16X8; + } else { + return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16 + : BLOCK_8X8; + } +} + void vp9_first_pass(VP9_COMP *cpi) { int mb_row, mb_col; MACROBLOCK *const x = &cpi->mb; @@ -542,6 +552,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int this_error; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); double error_weight = 1.0; + const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col); vp9_clear_system_state(); // __asm emms; @@ -549,30 +560,15 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - - if (mb_col * 2 + 1 < cm->mi_cols) { - if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X16; - } else { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X8; - } - } else { - if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X16; - } else { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X8; - } - } + xd->mi_8x8[0]->mbmi.sb_type = bsize; xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, - mb_row << 1, - num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type], - mb_col << 1, - num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type], + mb_row << 1, num_8x8_blocks_high_lookup[bsize], + mb_col << 1, num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - int energy = vp9_block_energy(cpi, x, xd->mi_8x8[0]->mbmi.sb_type); + int energy = vp9_block_energy(cpi, x, bsize); error_weight = vp9_vaq_inv_q_ratio(energy); } @@ -692,9 +688,8 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; - vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, - xd->mi_8x8[0]->mbmi.sb_type); - vp9_encode_sby(x, xd->mi_8x8[0]->mbmi.sb_type); + vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); + vp9_encode_sby(x, bsize); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 17df04695..1852d1ea0 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -93,14 +93,6 @@ FILE *kf_list; FILE *keyfile; #endif - -#ifdef MODE_STATS -extern void init_tx_count_stats(); -extern void write_tx_count_stats(); -extern void init_switchable_interp_stats(); -extern void write_switchable_interp_stats(); -#endif - #ifdef SPEEDSTATS unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -601,7 +593,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 1; + sf->adaptive_pred_interp_filter = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->recode_loop = 2; @@ -627,7 +619,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, FLAG_SKIP_INTRA_LOWVAR; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->auto_mv_step_size = 1; @@ -664,7 +656,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->auto_mv_step_size = 1; @@ -699,7 +691,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->auto_mv_step_size = 1; @@ -772,7 +764,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 1; + sf->adaptive_pred_interp_filter = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->recode_loop = 2; @@ -798,7 +790,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->auto_mv_step_size = 1; sf->reference_masking = 1; @@ -885,7 +877,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; - sf->adaptive_pred_filter_type = 0; + sf->adaptive_pred_interp_filter = 0; sf->reference_masking = 0; sf->use_one_partition_size_always = 0; sf->less_rectangular_check = 0; @@ -1340,7 +1332,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->cq_target_quality = cpi->oxcf.cq_level; - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; + cm->interp_filter = DEFAULT_INTERP_FILTER; cpi->target_bandwidth = cpi->oxcf.target_bandwidth; @@ -1632,11 +1624,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { init_context_counters(); #endif -#ifdef MODE_STATS - init_tx_count_stats(); - init_switchable_interp_stats(); -#endif - /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; @@ -1893,13 +1880,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) { vp9_end_second_pass(cpi); } -#ifdef MODE_STATS - if (cpi->pass != 1) { - write_tx_count_stats(); - write_switchable_interp_stats(); - } -#endif - #if CONFIG_INTERNAL_STATS vp9_clear_system_state(); @@ -3114,7 +3094,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, &top_index); if (!frame_is_intra_only(cm)) { - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; + cm->interp_filter = DEFAULT_INTERP_FILTER; /* TODO: Decide this more intelligently */ set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH)); } @@ -3872,24 +3852,25 @@ void vp9_set_svc(VP9_PTR comp, int use_svc) { return; } -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { +int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *reference) { int i, j; int total = 0; - uint8_t *src = source->y_buffer; - uint8_t *dst = dest->y_buffer; + const uint8_t *src = source->y_buffer; + const uint8_t *ref = reference->y_buffer; // Loop through the Y plane raw and reconstruction data summing // (square differences) for (i = 0; i < source->y_height; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, - &sse); + total += vp9_mse16x16(src + j, source->y_stride, + ref + j, reference->y_stride, &sse); } src += 16 * source->y_stride; - dst += 16 * dest->y_stride; + ref += 16 * reference->y_stride; } return total; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 24ebe9bd4..9cf3f62d6 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -379,7 +379,7 @@ typedef struct { // best for 8x8 mode. If set to 0 we always re check all the filters for // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_filter_type; + int adaptive_pred_interp_filter; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags @@ -810,7 +810,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); void vp9_set_speed_features(VP9_COMP *cpi); -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); +int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *reference); void vp9_alloc_compressor_data(VP9_COMP *cpi); diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 075698b31..0c0a20f90 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -34,38 +34,53 @@ static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) { void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) { } -static void search_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, +static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, + MACROBLOCKD *const xd, VP9_COMMON *const cm, + int filt_level, int partial) { + int filt_err; + + vp9_set_alt_lf_level(cpi, filt_level); + vp9_loop_filter_frame(cm, xd, filt_level, 1, partial); + + filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); + + // Re-instate the unfiltered frame + vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + + return filt_err; +} + +static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); - int best_err = 0; - int filt_err = 0; + int best_err; int filt_best; int filt_direction = 0; // Start the search at the previous frame filter level unless it is now out of // range. int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; + // Sum squared error at each filter level + int ss_err[MAX_LOOP_FILTER + 1]; + + // Set each entry to -1 + vpx_memset(ss_err, 0xFF, sizeof(ss_err)); // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - // Get baseline error score - vp9_set_alt_lf_level(cpi, filt_mid); - vp9_loop_filter_frame(cm, xd, filt_mid, 1, partial); - - best_err = vp9_calc_ss_err(sd, cm->frame_to_show); + best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial); filt_best = filt_mid; - - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + ss_err[filt_mid] = best_err; while (filter_step > 0) { const int filt_high = MIN(filt_mid + filter_step, max_filter_level); const int filt_low = MAX(filt_mid - filter_step, min_filter_level); + int filt_err; // Bias against raising loop filter in favor of lowering it. int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; @@ -79,14 +94,12 @@ static void search_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score - vp9_set_alt_lf_level(cpi, filt_low); - vp9_loop_filter_frame(cm, xd, filt_low, 1, partial); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - + if (ss_err[filt_low] < 0) { + filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial); + ss_err[filt_low] = filt_err; + } else { + filt_err = ss_err[filt_low]; + } // If value is close to the best so far then bias towards a lower loop // filter value. if ((filt_err - bias) < best_err) { @@ -100,14 +113,12 @@ static void search_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { - vp9_set_alt_lf_level(cpi, filt_high); - vp9_loop_filter_frame(cm, xd, filt_high, 1, partial); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - + if (ss_err[filt_high] < 0) { + filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial); + ss_err[filt_high] = filt_err; + } else { + filt_err = ss_err[filt_high]; + } // Was it better than the previous best? if (filt_err < (best_err - bias)) { best_err = filt_err; @@ -128,7 +139,8 @@ static void search_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, lf->filter_level = filt_best; } -void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int method) { +void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, + int method) { VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h index 12382fa8d..0fc1f88b3 100644 --- a/vp9/encoder/vp9_picklpf.h +++ b/vp9/encoder/vp9_picklpf.h @@ -21,7 +21,7 @@ struct VP9_COMP; void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); -void vp9_pick_filter_level(struct yv12_buffer_config *sd, +void vp9_pick_filter_level(const struct yv12_buffer_config *sd, struct VP9_COMP *cpi, int method); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5f2f8a819..ebe5f80f2 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -428,7 +428,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, } *out_rate_sum = rate_sum; - *out_dist_sum = dist_sum << 4; + *out_dist_sum = (int64_t)dist_sum << 4; } static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -1517,8 +1517,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, vp9_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, - width, height, ref, &xd->subpix, MV_PRECISION_Q3, + &xd->block_refs[ref]->sf, width, height, ref, + xd->interp_kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } @@ -2536,7 +2536,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]].as_mv, &xd->block_refs[!id]->sf, pw, ph, 0, - &xd->subpix, MV_PRECISION_Q3, + xd->interp_kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. @@ -2626,7 +2626,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate_y, int64_t *distortion_y, int *rate_uv, int64_t *distortion_uv, int *mode_excluded, int *disable_skip, - INTERPOLATION_TYPE *best_filter, + INTERP_FILTER *best_filter, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], @@ -2769,7 +2769,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) cpi->rd_filter_cache[i] = INT64_MAX; - if (cm->mcomp_filter_type != BILINEAR) { + if (cm->interp_filter != BILINEAR) { *best_filter = EIGHTTAP; if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { @@ -2792,16 +2792,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[i] = rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rd += rs_rd; cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; - if ((cm->mcomp_filter_type == SWITCHABLE && + if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || - (cm->mcomp_filter_type != SWITCHABLE && - (cm->mcomp_filter_type == mbmi->interp_filter || + (cm->interp_filter != SWITCHABLE && + (cm->interp_filter == mbmi->interp_filter || (i == 0 && intpel_mv)))) { restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { @@ -2817,7 +2817,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[i] = rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rd += rs_rd; cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); @@ -2838,13 +2838,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (newbest) { best_rd = rd; *best_filter = mbmi->interp_filter; - if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv) + if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { + if ((cm->interp_filter == SWITCHABLE && newbest) || + (cm->interp_filter != SWITCHABLE && + cm->interp_filter == mbmi->interp_filter)) { pred_exists = 1; } } @@ -2852,10 +2852,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } // Set the appropriate filter - mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? - cm->mcomp_filter_type : *best_filter; + mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? + cm->interp_filter : *best_filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; + rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2884,7 +2884,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) *rate2 += get_switchable_rate(x); if (!is_comp_pred && cpi->enable_encode_breakout) { @@ -3129,7 +3129,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_inter_rd = INT64_MAX; MB_PREDICTION_MODE best_intra_mode = DC_PRED; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; + INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; @@ -3282,7 +3282,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->mcomp_filter_type; + mbmi->interp_filter = cm->interp_filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); if (comp_pred) { @@ -3573,9 +3573,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && - cm->mcomp_filter_type != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - SWITCHABLE_FILTERS : cm->mcomp_filter_type]; + cm->interp_filter != BILINEAR) { + int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + SWITCHABLE_FILTERS : cm->interp_filter]; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { int64_t adj_rd; @@ -3649,8 +3649,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); // Updating rd_thresh_freq_fact[] here means that the different @@ -3692,7 +3692,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, else best_filter_diff[i] = best_rd - best_filter_rd[i]; } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); @@ -3754,7 +3754,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_prob comp_mode_p; int64_t best_inter_rd = INT64_MAX; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; + INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; @@ -3907,7 +3907,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->mcomp_filter_type; + mbmi->interp_filter = cm->interp_filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); if (comp_pred) { @@ -4013,17 +4013,17 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) cpi->rd_filter_cache[i] = INT64_MAX; - if (cm->mcomp_filter_type != BILINEAR) { + if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; - } else if (cpi->sf.adaptive_pred_filter_type == 1 && - ctx->pred_filter_type < SWITCHABLE) { - tmp_best_filter = ctx->pred_filter_type; - } else if (cpi->sf.adaptive_pred_filter_type == 2) { - tmp_best_filter = ctx->pred_filter_type < SWITCHABLE ? - ctx->pred_filter_type : 0; + } else if (cpi->sf.adaptive_pred_interp_filter == 1 && + ctx->pred_interp_filter < SWITCHABLE) { + tmp_best_filter = ctx->pred_interp_filter; + } else if (cpi->sf.adaptive_pred_interp_filter == 2) { + tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? + ctx->pred_interp_filter : 0; } else { for (switchable_filter_index = 0; switchable_filter_index < SWITCHABLE_FILTERS; @@ -4051,7 +4051,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd); @@ -4061,9 +4061,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_filter = mbmi->interp_filter; tmp_best_rd = tmp_rd; } - if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || - (mbmi->interp_filter == cm->mcomp_filter_type && - cm->mcomp_filter_type != SWITCHABLE)) { + if ((newbest && cm->interp_filter == SWITCHABLE) || + (mbmi->interp_filter == cm->interp_filter && + cm->interp_filter != SWITCHABLE)) { tmp_best_rdu = tmp_rd; tmp_best_rate = rate; tmp_best_ratey = rate_y; @@ -4095,8 +4095,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_best_rdu == INT64_MAX && pred_exists) continue; - mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? - tmp_best_filter : cm->mcomp_filter_type); + mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? + tmp_best_filter : cm->interp_filter); vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); if (!pred_exists) { // Handles the special case when a filter that is not in the @@ -4113,7 +4113,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; } else { - if (cm->mcomp_filter_type == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE) { int rs = get_switchable_rate(x); tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); } @@ -4131,7 +4131,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate2 += rate; distortion2 += distortion; - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rate2 += get_switchable_rate(x); if (!mode_excluded) @@ -4299,9 +4299,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && - cm->mcomp_filter_type != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - SWITCHABLE_FILTERS : cm->mcomp_filter_type]; + cm->interp_filter != BILINEAR) { + int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + SWITCHABLE_FILTERS : cm->interp_filter]; int64_t adj_rd; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (ref == INT64_MAX) @@ -4372,8 +4372,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); // Updating rd_thresh_freq_fact[] here means that the different @@ -4425,7 +4425,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, else best_filter_diff[i] = best_rd - best_filter_rd[i]; } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c index f15abc07d..0766b5107 100644 --- a/vp9/encoder/vp9_resize.c +++ b/vp9/encoder/vp9_resize.c @@ -16,7 +16,6 @@ #include <string.h> #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_resize.h" -#include "vpx/vpx_integer.h" #define FILTER_BITS 7 @@ -30,8 +29,44 @@ typedef int16_t interp_kernel[INTERP_TAPS]; -// Filters for interpolation - note this also filters integer pels. -const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = { +// Filters for interpolation (0.5-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = { + {-3, 0, 35, 64, 35, 0, -3, 0}, + {-3, -1, 34, 64, 36, 1, -3, 0}, + {-3, -1, 32, 64, 38, 1, -3, 0}, + {-2, -2, 31, 63, 39, 2, -3, 0}, + {-2, -2, 29, 63, 41, 2, -3, 0}, + {-2, -2, 28, 63, 42, 3, -4, 0}, + {-2, -3, 27, 63, 43, 4, -4, 0}, + {-2, -3, 25, 62, 45, 5, -4, 0}, + {-2, -3, 24, 62, 46, 5, -4, 0}, + {-2, -3, 23, 61, 47, 6, -4, 0}, + {-2, -3, 21, 60, 49, 7, -4, 0}, + {-1, -4, 20, 60, 50, 8, -4, -1}, + {-1, -4, 19, 59, 51, 9, -4, -1}, + {-1, -4, 17, 58, 52, 10, -4, 0}, + {-1, -4, 16, 57, 53, 12, -4, -1}, + {-1, -4, 15, 56, 54, 13, -4, -1}, + {-1, -4, 14, 55, 55, 14, -4, -1}, + {-1, -4, 13, 54, 56, 15, -4, -1}, + {-1, -4, 12, 53, 57, 16, -4, -1}, + {0, -4, 10, 52, 58, 17, -4, -1}, + {-1, -4, 9, 51, 59, 19, -4, -1}, + {-1, -4, 8, 50, 60, 20, -4, -1}, + {0, -4, 7, 49, 60, 21, -3, -2}, + {0, -4, 6, 47, 61, 23, -3, -2}, + {0, -4, 5, 46, 62, 24, -3, -2}, + {0, -4, 5, 45, 62, 25, -3, -2}, + {0, -4, 4, 43, 63, 27, -3, -2}, + {0, -4, 3, 42, 63, 28, -2, -2}, + {0, -3, 2, 41, 63, 29, -2, -2}, + {0, -3, 2, 39, 63, 31, -2, -2}, + {0, -3, 1, 38, 64, 32, -1, -3}, + {0, -3, 1, 36, 64, 34, -1, -3} +}; + +// Filters for interpolation (0.625-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = { {-1, -8, 33, 80, 33, -8, -1, 0}, {-1, -8, 30, 80, 35, -8, -1, 1}, {-1, -8, 28, 80, 37, -7, -2, 1}, @@ -66,10 +101,132 @@ const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = { {1, -1, -8, 35, 80, 30, -8, -1}, }; +// Filters for interpolation (0.75-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = { + {2, -11, 25, 96, 25, -11, 2, 0}, + {2, -11, 22, 96, 28, -11, 2, 0}, + {2, -10, 19, 95, 31, -11, 2, 0}, + {2, -10, 17, 95, 34, -12, 2, 0}, + {2, -9, 14, 94, 37, -12, 2, 0}, + {2, -8, 12, 93, 40, -12, 1, 0}, + {2, -8, 9, 92, 43, -12, 1, 1}, + {2, -7, 7, 91, 46, -12, 1, 0}, + {2, -7, 5, 90, 49, -12, 1, 0}, + {2, -6, 3, 88, 52, -12, 0, 1}, + {2, -5, 1, 86, 55, -12, 0, 1}, + {2, -5, -1, 84, 58, -11, 0, 1}, + {2, -4, -2, 82, 61, -11, -1, 1}, + {2, -4, -4, 80, 64, -10, -1, 1}, + {1, -3, -5, 77, 67, -9, -1, 1}, + {1, -3, -6, 75, 70, -8, -2, 1}, + {1, -2, -7, 72, 72, -7, -2, 1}, + {1, -2, -8, 70, 75, -6, -3, 1}, + {1, -1, -9, 67, 77, -5, -3, 1}, + {1, -1, -10, 64, 80, -4, -4, 2}, + {1, -1, -11, 61, 82, -2, -4, 2}, + {1, 0, -11, 58, 84, -1, -5, 2}, + {1, 0, -12, 55, 86, 1, -5, 2}, + {1, 0, -12, 52, 88, 3, -6, 2}, + {0, 1, -12, 49, 90, 5, -7, 2}, + {0, 1, -12, 46, 91, 7, -7, 2}, + {1, 1, -12, 43, 92, 9, -8, 2}, + {0, 1, -12, 40, 93, 12, -8, 2}, + {0, 2, -12, 37, 94, 14, -9, 2}, + {0, 2, -12, 34, 95, 17, -10, 2}, + {0, 2, -11, 31, 95, 19, -10, 2}, + {0, 2, -11, 28, 96, 22, -11, 2} +}; + +// Filters for interpolation (0.875-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = { + {3, -8, 13, 112, 13, -8, 3, 0}, + {3, -7, 10, 112, 17, -9, 3, -1}, + {2, -6, 7, 111, 21, -9, 3, -1}, + {2, -5, 4, 111, 24, -10, 3, -1}, + {2, -4, 1, 110, 28, -11, 3, -1}, + {1, -3, -1, 108, 32, -12, 4, -1}, + {1, -2, -3, 106, 36, -13, 4, -1}, + {1, -1, -6, 105, 40, -14, 4, -1}, + {1, -1, -7, 102, 44, -14, 4, -1}, + {1, 0, -9, 100, 48, -15, 4, -1}, + {1, 1, -11, 97, 53, -16, 4, -1}, + {0, 1, -12, 95, 57, -16, 4, -1}, + {0, 2, -13, 91, 61, -16, 4, -1}, + {0, 2, -14, 88, 65, -16, 4, -1}, + {0, 3, -15, 84, 69, -17, 4, 0}, + {0, 3, -16, 81, 73, -16, 3, 0}, + {0, 3, -16, 77, 77, -16, 3, 0}, + {0, 3, -16, 73, 81, -16, 3, 0}, + {0, 4, -17, 69, 84, -15, 3, 0}, + {-1, 4, -16, 65, 88, -14, 2, 0}, + {-1, 4, -16, 61, 91, -13, 2, 0}, + {-1, 4, -16, 57, 95, -12, 1, 0}, + {-1, 4, -16, 53, 97, -11, 1, 1}, + {-1, 4, -15, 48, 100, -9, 0, 1}, + {-1, 4, -14, 44, 102, -7, -1, 1}, + {-1, 4, -14, 40, 105, -6, -1, 1}, + {-1, 4, -13, 36, 106, -3, -2, 1}, + {-1, 4, -12, 32, 108, -1, -3, 1}, + {-1, 3, -11, 28, 110, 1, -4, 2}, + {-1, 3, -10, 24, 111, 4, -5, 2}, + {-1, 3, -9, 21, 111, 7, -6, 2}, + {-1, 3, -9, 17, 112, 10, -7, 3} +}; + +// Filters for interpolation (full-band) - no filtering for integer pixels +const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = { + {0, 0, 0, 128, 0, 0, 0, 0}, + {0, 1, -3, 128, 3, -1, 0, 0}, + {-1, 2, -6, 127, 7, -2, 1, 0}, + {-1, 3, -9, 126, 12, -4, 1, 0}, + {-1, 4, -12, 125, 16, -5, 1, 0}, + {-1, 4, -14, 123, 20, -6, 2, 0}, + {-1, 5, -15, 120, 25, -8, 2, 0}, + {-1, 5, -17, 118, 30, -9, 3, -1}, + {-1, 6, -18, 114, 35, -10, 3, -1}, + {-1, 6, -19, 111, 41, -12, 3, -1}, + {-1, 6, -20, 107, 46, -13, 4, -1}, + {-1, 6, -21, 103, 52, -14, 4, -1}, + {-1, 6, -21, 99, 57, -16, 5, -1}, + {-1, 6, -21, 94, 63, -17, 5, -1}, + {-1, 6, -20, 89, 68, -18, 5, -1}, + {-1, 6, -20, 84, 73, -19, 6, -1}, + {-1, 6, -20, 79, 79, -20, 6, -1}, + {-1, 6, -19, 73, 84, -20, 6, -1}, + {-1, 5, -18, 68, 89, -20, 6, -1}, + {-1, 5, -17, 63, 94, -21, 6, -1}, + {-1, 5, -16, 57, 99, -21, 6, -1}, + {-1, 4, -14, 52, 103, -21, 6, -1}, + {-1, 4, -13, 46, 107, -20, 6, -1}, + {-1, 3, -12, 41, 111, -19, 6, -1}, + {-1, 3, -10, 35, 114, -18, 6, -1}, + {-1, 3, -9, 30, 118, -17, 5, -1}, + {0, 2, -8, 25, 120, -15, 5, -1}, + {0, 2, -6, 20, 123, -14, 4, -1}, + {0, 1, -5, 16, 125, -12, 4, -1}, + {0, 1, -4, 12, 126, -9, 3, -1}, + {0, 1, -2, 7, 127, -6, 2, -1}, + {0, 0, -1, 3, 128, -3, 1, 0} +}; + // Filters for factor of 2 downsampling. static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1}; static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3}; +static const interp_kernel *choose_interp_filter(int inlength, int outlength) { + int outlength16 = outlength * 16; + if (outlength16 >= inlength * 16) + return vp9_filteredinterp_filters1000; + else if (outlength16 >= inlength * 13) + return vp9_filteredinterp_filters875; + else if (outlength16 >= inlength * 11) + return vp9_filteredinterp_filters750; + else if (outlength16 >= inlength * 9) + return vp9_filteredinterp_filters625; + else + return vp9_filteredinterp_filters500; +} + static void interpolate(const uint8_t *const input, int inlength, uint8_t *output, int outlength) { const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) / @@ -81,6 +238,9 @@ static void interpolate(const uint8_t *const input, int inlength, int x, x1, x2, sum, k, int_pel, sub_pel; int64_t y; + const interp_kernel *interp_filters = + choose_interp_filter(inlength, outlength); + x = 0; y = offset; while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { @@ -101,7 +261,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) { const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; @@ -116,7 +276,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? @@ -129,7 +289,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; @@ -140,7 +300,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= diff --git a/vp9/encoder/vp9_resize.h b/vp9/encoder/vp9_resize.h index c67595a3f..1818cd47e 100644 --- a/vp9/encoder/vp9_resize.h +++ b/vp9/encoder/vp9_resize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -12,6 +12,7 @@ #define VP9_ENCODER_VP9_RESIZE_H_ #include <stdio.h> +#include "vpx/vpx_integer.h" void vp9_resize_plane(const uint8_t *const input, int height, diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index c2eea0aaa..c9a424648 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -60,7 +60,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, 16, 16, which_mv, - &xd->subpix, MV_PRECISION_Q3, x, y); + xd->interp_kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_size, @@ -68,7 +68,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - &xd->subpix, mv_precision_uv, x, y); + xd->interp_kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_size, @@ -76,7 +76,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - &xd->subpix, mv_precision_uv, x, y); + xd->interp_kernel, mv_precision_uv, x, y); } void vp9_temporal_filter_apply_c(uint8_t *frame1, |