summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp9/common/vp9_alloccommon.c62
-rw-r--r--vp9/common/vp9_alloccommon.h3
-rw-r--r--vp9/common/vp9_reconintra.c7
-rw-r--r--vp9/decoder/vp9_decodeframe.c3
-rw-r--r--vp9/encoder/vp9_avg.c6
-rw-r--r--vp9/encoder/vp9_bitstream.c8
-rw-r--r--vp9/encoder/vp9_encodeframe.c197
-rw-r--r--vp9/encoder/vp9_encodemb.c12
-rw-r--r--vp9/encoder/vp9_encoder.c138
-rw-r--r--vp9/encoder/vp9_encoder.h19
-rw-r--r--vp9/encoder/vp9_firstpass.c61
-rw-r--r--vp9/encoder/vp9_mbgraph.c2
-rw-r--r--vp9/encoder/vp9_mcomp.c146
-rw-r--r--vp9/encoder/vp9_mcomp.h7
-rw-r--r--vp9/encoder/vp9_pickmode.c9
-rw-r--r--vp9/encoder/vp9_quantize.c49
-rw-r--r--vp9/encoder/vp9_quantize.h6
-rw-r--r--vp9/encoder/vp9_rd.c11
-rw-r--r--vp9/encoder/vp9_rd.h4
-rw-r--r--vp9/encoder/vp9_rdopt.c20
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c4
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c15
-rw-r--r--vp9/vp9_dx_iface.c3
23 files changed, 424 insertions, 368 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 8b04d1b43..e209788c3 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -141,68 +141,6 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
return 1;
}
-static void init_frame_bufs(VP9_COMMON *cm) {
- BufferPool *const pool = cm->buffer_pool;
- int i;
-
- cm->new_fb_idx = FRAME_BUFFERS - 1;
- pool->frame_bufs[cm->new_fb_idx].ref_count = 1;
-
- for (i = 0; i < REF_FRAMES; ++i) {
- cm->ref_frame_map[i] = i;
- pool->frame_bufs[i].ref_count = 1;
- }
-}
-
-int vp9_alloc_ref_frame_buffers(VP9_COMMON *cm, int width, int height) {
- int i;
- const int ss_x = cm->subsampling_x;
- const int ss_y = cm->subsampling_y;
-
- vp9_free_ref_frame_buffers(cm);
-
- for (i = 0; i < FRAME_BUFFERS; ++i) {
- BufferPool *const pool = cm->buffer_pool;
- pool->frame_bufs[i].ref_count = 0;
- if (vp9_alloc_frame_buffer(&pool->frame_bufs[i].buf, width, height,
- ss_x, ss_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS,
- cm->byte_alignment) < 0)
- goto fail;
- if (pool->frame_bufs[i].mvs == NULL) {
- pool->frame_bufs[i].mvs =
- (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(*pool->frame_bufs[i].mvs));
- if (pool->frame_bufs[i].mvs == NULL)
- goto fail;
-
- pool->frame_bufs[i].mi_rows = cm->mi_rows;
- pool->frame_bufs[i].mi_cols = cm->mi_cols;
- }
- }
-
- init_frame_bufs(cm);
-
-#if CONFIG_VP9_POSTPROC
- if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS,
- cm->byte_alignment) < 0)
- goto fail;
-#endif
-
- return 0;
-
- fail:
- vp9_free_ref_frame_buffers(cm);
- return 1;
-}
-
void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_ref_frame_buffers(cm);
vp9_free_context_buffers(cm);
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index 09da74e49..d82397fa3 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -12,6 +12,8 @@
#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_
#define VP9_COMMON_VP9_ALLOCCOMMON_H_
+#define INVALID_IDX -1 // Invalid buffer index.
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -24,7 +26,6 @@ int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
void vp9_init_context_buffers(struct VP9Common *cm);
void vp9_free_context_buffers(struct VP9Common *cm);
-int vp9_alloc_ref_frame_buffers(struct VP9Common *cm, int width, int height);
void vp9_free_ref_frame_buffers(struct VP9Common *cm);
int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index e614e6da8..1668b99ce 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -12,6 +12,7 @@
#include "./vp9_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/vpx_once.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_onyxc_int.h"
@@ -579,7 +580,7 @@ static intra_high_pred_fn pred_high[INTRA_MODES][4];
static intra_high_pred_fn dc_pred_high[2][2][4];
#endif // CONFIG_VP9_HIGHBITDEPTH
-void vp9_init_intra_predictors() {
+static void vp9_init_intra_predictors_internal(void) {
#define INIT_ALL_SIZES(p, type) \
p[TX_4X4] = vp9_##type##_predictor_4x4; \
p[TX_8X8] = vp9_##type##_predictor_8x8; \
@@ -894,3 +895,7 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
have_top, have_left, have_right, x, y, plane);
}
+
+void vp9_init_intra_predictors() {
+ once(vp9_init_intra_predictors_internal);
+}
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index d345a0578..c183cf38e 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -934,7 +934,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
winterface->sync(&pbi->lf_worker);
vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm,
pbi->mb.plane);
- vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
assert(tile_rows <= 4);
@@ -1362,7 +1361,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
for (i = 0; i < REFS_PER_FRAME; ++i) {
- cm->frame_refs[i].idx = -1;
+ cm->frame_refs[i].idx = INVALID_IDX;
cm->frame_refs[i].buf = NULL;
}
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c
index 50c8bca0b..90d113c32 100644
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -32,12 +32,13 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
const int ref_stride, const int height) {
int idx;
+ const int norm_factor = MAX(8, height >> 1);
for (idx = 0; idx < 16; ++idx) {
int i;
hbuf[idx] = 0;
for (i = 0; i < height; ++i)
hbuf[idx] += ref[i * ref_stride];
- hbuf[idx] /= 32;
+ hbuf[idx] /= norm_factor;
++ref;
}
}
@@ -45,9 +46,10 @@ void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
int idx;
int16_t sum = 0;
+ const int norm_factor = MAX(8, width >> 1);
for (idx = 0; idx < width; ++idx)
sum += ref[idx];
- return sum / 32;
+ return sum / norm_factor;
}
int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 3a70364ae..b24fe2950 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -989,8 +989,6 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
- found = cm->width == cfg->y_crop_width &&
- cm->height == cfg->y_crop_height;
// Set "found" to 0 for temporal svc and for spatial svc key frame
if (cpi->use_svc &&
@@ -1003,6 +1001,9 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
cpi->svc.layer_context[0].frames_from_key_frame <
cpi->svc.number_temporal_layers + 1))) {
found = 0;
+ } else if (cfg != NULL) {
+ found = cm->width == cfg->y_crop_width &&
+ cm->height == cfg->y_crop_height;
}
vp9_wb_write_bit(wb, found);
if (found) {
@@ -1114,7 +1115,8 @@ static void write_uncompressed_header(VP9_COMP *cpi,
MV_REFERENCE_FRAME ref_frame;
vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- vp9_wb_write_literal(wb, get_ref_frame_idx(cpi, ref_frame),
+ assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
+ vp9_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
REF_FRAMES_LOG2);
vp9_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
}
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 63ca2d34d..a86981a71 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -144,12 +144,14 @@ static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
int mi_row, int mi_col,
BLOCK_SIZE bs) {
+ unsigned int sse, var;
+ uint8_t *last_y;
const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
- const uint8_t* last_y = &last->y_buffer[mi_row * MI_SIZE * last->y_stride +
- mi_col * MI_SIZE];
- unsigned int sse;
- const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
- last_y, last->y_stride, &sse);
+
+ assert(last != NULL);
+ last_y =
+ &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
@@ -518,156 +520,8 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
#define GLOBAL_MOTION 1
#endif
-#if GLOBAL_MOTION
-static int vector_match(int16_t *ref, int16_t *src, int bwl) {
- int best_sad = INT_MAX;
- int this_sad;
- int d;
- int center, offset = 0;
- int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
- for (d = 0; d <= bw; d += 16) {
- this_sad = vp9_vector_var(&ref[d], src, bwl);
- if (this_sad < best_sad) {
- best_sad = this_sad;
- offset = d;
- }
- }
- center = offset;
-
- for (d = -8; d <= 8; d += 16) {
- int this_pos = offset + d;
- // check limit
- if (this_pos < 0 || this_pos > bw)
- continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
- if (this_sad < best_sad) {
- best_sad = this_sad;
- center = this_pos;
- }
- }
- offset = center;
-
- for (d = -4; d <= 4; d += 8) {
- int this_pos = offset + d;
- // check limit
- if (this_pos < 0 || this_pos > bw)
- continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
- if (this_sad < best_sad) {
- best_sad = this_sad;
- center = this_pos;
- }
- }
- offset = center;
-
- for (d = -2; d <= 2; d += 4) {
- int this_pos = offset + d;
- // check limit
- if (this_pos < 0 || this_pos > bw)
- continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
- if (this_sad < best_sad) {
- best_sad = this_sad;
- center = this_pos;
- }
- }
- offset = center;
-
- for (d = -1; d <= 1; d += 2) {
- int this_pos = offset + d;
- // check limit
- if (this_pos < 0 || this_pos > bw)
- continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
- if (this_sad < best_sad) {
- best_sad = this_sad;
- center = this_pos;
- }
- }
-
- return (center - (bw >> 1));
-}
-
-static const MV search_pos[9] = {
- {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 0}, {0, 1},
- {1, -1}, {1, 0}, {1, 1},
-};
-
-static void motion_estimation(VP9_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize) {
- MACROBLOCKD *xd = &x->e_mbd;
- DECLARE_ALIGNED(16, int16_t, hbuf[128]);
- DECLARE_ALIGNED(16, int16_t, vbuf[128]);
- DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
- DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
- int idx;
- const int bw = 4 << b_width_log2_lookup[bsize];
- const int bh = 4 << b_height_log2_lookup[bsize];
- const int search_width = bw << 1;
- const int search_height = bh << 1;
- const int src_stride = x->plane[0].src.stride;
- const int ref_stride = xd->plane[0].pre[0].stride;
- uint8_t const *ref_buf, *src_buf;
- MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv;
- int best_sad;
- MV this_mv;
-
- // Set up prediction 1-D reference set
- ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
- for (idx = 0; idx < search_width; idx += 16) {
- vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
- ref_buf += 16;
- }
-
- ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
- for (idx = 0; idx < search_height; ++idx) {
- vbuf[idx] = vp9_int_pro_col(ref_buf, bw);
- ref_buf += ref_stride;
- }
-
- // Set up src 1-D reference set
- for (idx = 0; idx < bw; idx += 16) {
- src_buf = x->plane[0].src.buf + idx;
- vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
- }
-
- src_buf = x->plane[0].src.buf;
- for (idx = 0; idx < bh; ++idx) {
- src_vbuf[idx] = vp9_int_pro_col(src_buf, bw);
- src_buf += src_stride;
- }
-
- // Find the best match per 1-D search
- tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
- tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
-
- best_sad = INT_MAX;
- this_mv = *tmp_mv;
- for (idx = 0; idx < 9; ++idx) {
- int this_sad;
- src_buf = x->plane[0].src.buf;
- ref_buf = xd->plane[0].pre[0].buf +
- (search_pos[idx].row + this_mv.row) * ref_stride +
- (search_pos[idx].col + this_mv.col);
-
- this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
- ref_buf, ref_stride);
- if (this_sad < best_sad) {
- best_sad = this_sad;
- tmp_mv->row = search_pos[idx].row + this_mv.row;
- tmp_mv->col = search_pos[idx].col + this_mv.col;
- }
- }
-
- tmp_mv->row *= 8;
- tmp_mv->col *= 8;
-
- x->pred_mv[LAST_FRAME] = *tmp_mv;
-}
-#endif
-
// This function chooses partitioning based on the variance between source and
-// reconstructed last, where variance is computed for downs-sampled inputs.
+// reconstructed last, where variance is computed for down-sampled inputs.
static void choose_partitioning(VP9_COMP *cpi,
const TileInfo *const tile,
MACROBLOCK *x,
@@ -682,7 +536,6 @@ static void choose_partitioning(VP9_COMP *cpi,
int sp;
int dp;
int pixels_wide = 64, pixels_high = 64;
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
// Always use 4x4 partition for key frame.
const int is_key_frame = (cm->frame_type == KEY_FRAME);
@@ -709,7 +562,13 @@ static void choose_partitioning(VP9_COMP *cpi,
if (!is_key_frame) {
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
- unsigned int var = 0, sse;
+ unsigned int uv_sad;
+#if GLOBAL_MOTION
+ unsigned int y_sad;
+ BLOCK_SIZE bsize;
+#endif
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ assert(yv12 != NULL);
vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
&cm->frame_refs[LAST_FRAME - 1].sf);
mbmi->ref_frame[0] = LAST_FRAME;
@@ -719,7 +578,16 @@ static void choose_partitioning(VP9_COMP *cpi,
mbmi->interp_filter = BILINEAR;
#if GLOBAL_MOTION
- motion_estimation(cpi, x, BLOCK_64X64);
+ if (mi_row + 4 < cm->mi_rows && mi_col + 4 < cm->mi_cols)
+ bsize = BLOCK_64X64;
+ else if (mi_row + 4 < cm->mi_rows && mi_col + 4 >= cm->mi_cols)
+ bsize = BLOCK_32X64;
+ else if (mi_row + 4 >= cm->mi_rows && mi_col + 4 < cm->mi_cols)
+ bsize = BLOCK_64X32;
+ else
+ bsize = BLOCK_32X32;
+
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
#endif
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
@@ -728,10 +596,14 @@ static void choose_partitioning(VP9_COMP *cpi,
struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i];
const BLOCK_SIZE bs = get_plane_block_size(BLOCK_64X64, pd);
- var += cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride, &sse);
- if (sse > 2048)
- x->color_sensitivity[i - 1] = 1;
+ uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
+
+#if GLOBAL_MOTION
+ x->color_sensitivity[i - 1] = uv_sad * 4 > y_sad;
+#else
+ x->color_sensitivity[i - 1] = (uv_sad > 512);
+#endif
}
d = xd->plane[0].dst.buf;
@@ -3897,7 +3769,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
// Special case: set prev_mi to NULL when the previous mode info
// context cannot be used.
cm->prev_mi = cm->use_prev_frame_mvs ?
- cm->prev_mip + cm->mi_stride + 1 : NULL;
+ cm->prev_mip + cm->mi_stride + 1 : NULL;
x->quant_fp = cpi->sf.use_quant_fp;
vp9_zero(x->skip_txfm);
@@ -4169,6 +4041,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
for (ref = 0; ref < 1 + is_compound; ++ref) {
YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
mbmi->ref_frame[ref]);
+ assert(cfg != NULL);
vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 70b804e31..65e299793 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -476,19 +476,19 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
break;
case TX_16X16:
vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
- vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
- vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
- vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
@@ -508,19 +508,19 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
break;
case TX_16X16:
vp9_fdct16x16_1(src_diff, coeff, diff_stride);
- vp9_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
vp9_fdct8x8_1(src_diff, coeff, diff_stride);
- vp9_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
- vp9_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index bd7e0b64c..249c2363d 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -483,6 +483,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
+ // TODO(agrange) Check if ARF is enabled and skip allocation if not.
if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
oxcf->width, oxcf->height,
cm->subsampling_x, cm->subsampling_y,
@@ -495,13 +496,6 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
"Failed to allocate altref buffer");
}
-static void alloc_ref_frame_buffers(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if (vp9_alloc_ref_frame_buffers(cm, cm->width, cm->height))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate frame buffers");
-}
-
static void alloc_util_frame_buffers(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
@@ -2483,6 +2477,21 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vp9_extend_frame_inner_borders(cm->frame_to_show);
}
+static INLINE void alloc_frame_mvs(const VP9_COMMON *cm,
+ int buffer_idx) {
+ RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
+ if (new_fb_ptr->mvs == NULL ||
+ new_fb_ptr->mi_rows < cm->mi_rows ||
+ new_fb_ptr->mi_cols < cm->mi_cols) {
+ vpx_free(new_fb_ptr->mvs);
+ new_fb_ptr->mvs =
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*new_fb_ptr->mvs));
+ new_fb_ptr->mi_rows = cm->mi_rows;
+ new_fb_ptr->mi_cols = cm->mi_cols;
+ }
+}
+
void vp9_scale_references(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MV_REFERENCE_FRAME ref_frame;
@@ -2491,13 +2500,19 @@ void vp9_scale_references(VP9_COMP *cpi) {
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
// Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
- const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- BufferPool *const pool = cm->buffer_pool;
- const YV12_BUFFER_CONFIG *const ref = &pool->frame_bufs[idx].buf;
+ BufferPool *const pool = cm->buffer_pool;
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi,
+ ref_frame);
+
+ if (ref == NULL) {
+ cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
+ continue;
+ }
#if CONFIG_VP9_HIGHBITDEPTH
if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
const int new_fb = get_free_fb(cm);
+ RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb];
cm->cur_frame = &pool->frame_bufs[new_fb];
vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf,
cm->width, cm->height,
@@ -2505,35 +2520,28 @@ void vp9_scale_references(VP9_COMP *cpi) {
cm->use_highbitdepth,
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
NULL, NULL, NULL);
- scale_and_extend_frame(ref, &pool->frame_bufs[new_fb].buf,
- (int)cm->bit_depth);
+ scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
#else
if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
const int new_fb = get_free_fb(cm);
- vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf,
+ RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb];
+ vp9_realloc_frame_buffer(&new_fb_ptr->buf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
NULL, NULL, NULL);
- scale_and_extend_frame(ref, &pool->frame_bufs[new_fb].buf);
+ scale_and_extend_frame(ref, &new_fb_ptr->buf);
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
- if (pool->frame_bufs[new_fb].mvs == NULL ||
- pool->frame_bufs[new_fb].mi_rows < cm->mi_rows ||
- pool->frame_bufs[new_fb].mi_cols < cm->mi_cols) {
- vpx_free(pool->frame_bufs[new_fb].mvs);
- pool->frame_bufs[new_fb].mvs =
- (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(*pool->frame_bufs[new_fb].mvs));
- pool->frame_bufs[new_fb].mi_rows = cm->mi_rows;
- pool->frame_bufs[new_fb].mi_cols = cm->mi_cols;
- }
+
+ alloc_frame_mvs(cm, new_fb);
} else {
- cpi->scaled_ref_idx[ref_frame - 1] = idx;
- ++pool->frame_bufs[idx].ref_count;
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
+ ++pool->frame_bufs[buf_idx].ref_count;
}
} else {
- cpi->scaled_ref_idx[ref_frame - 1] = INVALID_REF_BUFFER_IDX;
+ cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
}
}
}
@@ -2543,11 +2551,11 @@ static void release_scaled_references(VP9_COMP *cpi) {
int i;
for (i = 0; i < MAX_REF_FRAMES; ++i) {
const int idx = cpi->scaled_ref_idx[i];
- RefCntBuffer *const buf = idx != INVALID_REF_BUFFER_IDX ?
+ RefCntBuffer *const buf = idx != INVALID_IDX ?
&cm->buffer_pool->frame_bufs[idx] : NULL;
if (buf != NULL) {
--buf->ref_count;
- cpi->scaled_ref_idx[i] = INVALID_REF_BUFFER_IDX;
+ cpi->scaled_ref_idx[i] = INVALID_IDX;
}
}
}
@@ -2751,6 +2759,8 @@ void set_frame_size(VP9_COMP *cpi) {
vp9_set_target_rate(cpi);
}
+ alloc_frame_mvs(cm, cm->new_fb_idx);
+
// Reset the frame pointers to the current frame size.
vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
cm->width, cm->height,
@@ -2765,24 +2775,30 @@ void set_frame_size(VP9_COMP *cpi) {
init_motion_estimation(cpi);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[idx].buf;
RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
- ref_buf->buf = buf;
- ref_buf->idx = idx;
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+
+ ref_buf->idx = buf_idx;
+
+ if (buf_idx != INVALID_IDX) {
+ YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
+ ref_buf->buf = buf;
#if CONFIG_VP9_HIGHBITDEPTH
- vp9_setup_scale_factors_for_frame(&ref_buf->sf,
- buf->y_crop_width, buf->y_crop_height,
- cm->width, cm->height,
- (buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
- 1 : 0);
+ vp9_setup_scale_factors_for_frame(&ref_buf->sf,
+ buf->y_crop_width, buf->y_crop_height,
+ cm->width, cm->height,
+ (buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+ 1 : 0);
#else
- vp9_setup_scale_factors_for_frame(&ref_buf->sf,
- buf->y_crop_width, buf->y_crop_height,
- cm->width, cm->height);
+ vp9_setup_scale_factors_for_frame(&ref_buf->sf,
+ buf->y_crop_width, buf->y_crop_height,
+ cm->width, cm->height);
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (vp9_is_scaled(&ref_buf->sf))
- vp9_extend_frame_borders(buf);
+ if (vp9_is_scaled(&ref_buf->sf))
+ vp9_extend_frame_borders(buf);
+ } else {
+ ref_buf->buf = NULL;
+ }
}
set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
@@ -3448,6 +3464,16 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
vp9_twopass_postencode_update(cpi);
}
+static void init_ref_frame_bufs(VP9_COMMON *cm) {
+ int i;
+ BufferPool *const pool = cm->buffer_pool;
+ cm->new_fb_idx = INVALID_IDX;
+ for (i = 0; i < REF_FRAMES; ++i) {
+ cm->ref_frame_map[i] = INVALID_IDX;
+ pool->frame_bufs[i].ref_count = 0;
+ }
+}
+
static void check_initial_width(VP9_COMP *cpi,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
@@ -3468,7 +3494,7 @@ static void check_initial_width(VP9_COMP *cpi,
#endif
alloc_raw_frame_buffers(cpi);
- alloc_ref_frame_buffers(cpi);
+ init_ref_frame_bufs(cm);
alloc_util_frame_buffers(cpi);
init_motion_estimation(cpi); // TODO(agrange) This can be removed.
@@ -3793,8 +3819,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Find a free buffer for the new frame, releasing the reference previously
// held.
- pool->frame_bufs[cm->new_fb_idx].ref_count--;
+ if (cm->new_fb_idx != INVALID_IDX) {
+ --pool->frame_bufs[cm->new_fb_idx].ref_count;
+ }
cm->new_fb_idx = get_free_fb(cm);
+
+ if (cm->new_fb_idx == INVALID_IDX)
+ return -1;
+
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
if (!cpi->use_svc && cpi->multi_arf_allowed) {
@@ -3821,7 +3853,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
for (i = 0; i < MAX_REF_FRAMES; ++i)
- cpi->scaled_ref_idx[i] = INVALID_REF_BUFFER_IDX;
+ cpi->scaled_ref_idx[i] = INVALID_IDX;
if (oxcf->pass == 1 &&
(!cpi->use_svc || is_two_pass_svc(cpi))) {
@@ -3907,8 +3939,18 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
PSNR_STATS psnr2;
double frame_ssim2 = 0, weight = 0;
#if CONFIG_VP9_POSTPROC
- // TODO(agrange) Add resizing of post-proc buffer in here when the
- // encoder is changed to use on-demand buffer allocation.
+ if (vp9_alloc_frame_buffer(&cm->post_proc_buffer,
+ recon->y_crop_width, recon->y_crop_height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS,
+ cm->byte_alignment) < 0) {
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate post processing buffer");
+ }
+
vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer,
cm->lf.filter_level * 10 / 6);
#endif
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 69edfded1..c3679ca0b 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -17,6 +17,7 @@
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
+#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_ppflags.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_thread_common.h"
@@ -47,7 +48,6 @@ extern "C" {
#endif
#define DEFAULT_GF_INTERVAL 10
-#define INVALID_REF_BUFFER_IDX -1 // Marks an invalid reference buffer id.
typedef struct {
int nmvjointcost[MV_JOINTS];
@@ -517,8 +517,8 @@ static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
}
-static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
- MV_REFERENCE_FRAME ref_frame) {
+static INLINE int get_ref_frame_map_idx(const VP9_COMP *cpi,
+ MV_REFERENCE_FRAME ref_frame) {
if (ref_frame == LAST_FRAME) {
return cpi->lst_fb_idx;
} else if (ref_frame == GOLDEN_FRAME) {
@@ -528,12 +528,19 @@ static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
}
}
+static INLINE int get_ref_frame_buf_idx(const VP9_COMP *const cpi,
+ int ref_frame) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
+ return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
+}
+
static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
VP9_COMMON *const cm = &cpi->common;
- BufferPool *const pool = cm->buffer_pool;
- return &pool->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
- .buf;
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ return
+ buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
}
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index b813e7de0..6c8bbdb6d 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -66,12 +66,6 @@
unsigned int arf_count = 0;
#endif
-static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
- YV12_BUFFER_CONFIG temp = *a;
- *a = *b;
- *b = temp;
-}
-
// Resets the first pass file to the given position using a relative seek from
// the current position.
static void reset_fpf_position(TWO_PASS *p,
@@ -465,12 +459,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int i;
int recon_yoffset, recon_uvoffset;
- YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
- int recon_y_stride = lst_yv12->y_stride;
- int recon_uv_stride = lst_yv12->uv_stride;
- int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
int64_t intra_error = 0;
int64_t coded_error = 0;
int64_t sr_coded_error = 0;
@@ -488,11 +476,22 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
MV lastmv = {0, 0};
TWO_PASS *twopass = &cpi->twopass;
const MV zero_mv = {0, 0};
+ int recon_y_stride, recon_uv_stride, uv_mb_height;
+
+ YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+
LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?
&cpi->svc.layer_context[cpi->svc.spatial_layer_id] : NULL;
double intra_factor;
double brightness_factor;
+ BufferPool *const pool = cm->buffer_pool;
+
+ // First pass code requires valid last and new frame buffers.
+ assert(new_yv12 != NULL);
+ assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -537,21 +536,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
}
if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- BufferPool *const pool = cm->buffer_pool;
- const int ref_idx =
- cm->ref_frame_map[get_ref_frame_idx(cpi, GOLDEN_FRAME)];
- const int scaled_idx = cpi->scaled_ref_idx[GOLDEN_FRAME - 1];
-
- gld_yv12 = (scaled_idx != ref_idx) ? &pool->frame_bufs[scaled_idx].buf :
- get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
+ if (gld_yv12 == NULL) {
+ gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ }
} else {
gld_yv12 = NULL;
}
- recon_y_stride = new_yv12->y_stride;
- recon_uv_stride = new_yv12->uv_stride;
- uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
-
set_ref_ptrs(cm, xd,
(cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME: NONE,
(cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE);
@@ -563,9 +555,12 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
vp9_setup_src_planes(x, cpi->Source, 0, 0);
- vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0);
+ if (!frame_is_intra_only(cm)) {
+ vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
+ }
+
xd->mi = cm->mi;
xd->mi[0].src_mi = &xd->mi[0];
@@ -585,6 +580,10 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
// Tiling is ignored in the first pass.
vp9_tile_init(&tile, cm, 0, 0);
+ recon_y_stride = new_yv12->y_stride;
+ recon_uv_stride = new_yv12->uv_stride;
+ uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
+
for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
MV best_ref_mv = {0, 0};
@@ -1020,7 +1019,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
((twopass->this_frame_stats.intra_error /
DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
if (gld_yv12 != NULL) {
- vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idx]);
}
twopass->sr_update_lag = 1;
} else {
@@ -1032,14 +1032,17 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
if (lc != NULL) {
vp9_update_reference_frames(cpi);
} else {
- // Swap frame pointers so last frame refers to the frame we just compressed.
- swap_yv12(lst_yv12, new_yv12);
+ // The frame we just compressed now becomes the last frame.
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
+ cm->new_fb_idx);
}
// Special case for the first frame. Copy into the GF buffer as a second
// reference.
- if (cm->current_video_frame == 0 && gld_yv12 != NULL && lc == NULL) {
- vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX &&
+ lc == NULL) {
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idx]);
}
// Use this to see what the first pass reconstruction looks like.
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 18a8c72c4..b3a8df924 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -376,6 +376,8 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
int i, n_frames = vp9_lookahead_depth(cpi->lookahead);
YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ assert(golden_ref != NULL);
+
// we need to look ahead beyond where the ARF transitions into
// being a GF - so exit if we don't look ahead beyond that
if (n_frames <= cpi->rc.frames_till_gf_update_due)
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 159e0fc0c..c49a8bef3 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1714,6 +1714,152 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
return bestsad;
}
+static int vector_match(int16_t *ref, int16_t *src, int bwl) {
+ int best_sad = INT_MAX;
+ int this_sad;
+ int d;
+ int center, offset = 0;
+ int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
+ for (d = 0; d <= bw; d += 16) {
+ this_sad = vp9_vector_var(&ref[d], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ offset = d;
+ }
+ }
+ center = offset;
+
+ for (d = -8; d <= 8; d += 16) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw)
+ continue;
+ this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+ offset = center;
+
+ for (d = -4; d <= 4; d += 8) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw)
+ continue;
+ this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+ offset = center;
+
+ for (d = -2; d <= 2; d += 4) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw)
+ continue;
+ this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+ offset = center;
+
+ for (d = -1; d <= 1; d += 2) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw)
+ continue;
+ this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+
+ return (center - (bw >> 1));
+}
+
+static const MV search_pos[9] = {
+ {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 0}, {0, 1},
+ {1, -1}, {1, 0}, {1, 1},
+};
+
+unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ DECLARE_ALIGNED(16, int16_t, hbuf[128]);
+ DECLARE_ALIGNED(16, int16_t, vbuf[128]);
+ DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
+ DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
+ int idx;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int search_width = bw << 1;
+ const int search_height = bh << 1;
+ const int src_stride = x->plane[0].src.stride;
+ const int ref_stride = xd->plane[0].pre[0].stride;
+ uint8_t const *ref_buf, *src_buf;
+ MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv;
+ int best_sad;
+ MV this_mv;
+
+ // Set up prediction 1-D reference set
+ ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
+ for (idx = 0; idx < search_width; idx += 16) {
+ vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
+ ref_buf += 16;
+ }
+
+ ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
+ for (idx = 0; idx < search_height; ++idx) {
+ vbuf[idx] = vp9_int_pro_col(ref_buf, bw);
+ ref_buf += ref_stride;
+ }
+
+ // Set up src 1-D reference set
+ for (idx = 0; idx < bw; idx += 16) {
+ src_buf = x->plane[0].src.buf + idx;
+ vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
+ }
+
+ src_buf = x->plane[0].src.buf;
+ for (idx = 0; idx < bh; ++idx) {
+ src_vbuf[idx] = vp9_int_pro_col(src_buf, bw);
+ src_buf += src_stride;
+ }
+
+ // Find the best match per 1-D search
+ tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
+ tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
+
+ best_sad = INT_MAX;
+ this_mv = *tmp_mv;
+ for (idx = 0; idx < 9; ++idx) {
+ int this_sad;
+ src_buf = x->plane[0].src.buf;
+ ref_buf = xd->plane[0].pre[0].buf +
+ (search_pos[idx].row + this_mv.row) * ref_stride +
+ (search_pos[idx].col + this_mv.col);
+
+ this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
+ ref_buf, ref_stride);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ tmp_mv->row = search_pos[idx].row + this_mv.row;
+ tmp_mv->col = search_pos[idx].col + this_mv.col;
+ }
+ }
+ tmp_mv->row *= 8;
+ tmp_mv->col *= 8;
+ x->pred_mv[LAST_FRAME] = *tmp_mv;
+
+ return best_sad;
+}
+
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index bba48fd6e..dd8a46079 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -72,7 +72,7 @@ int vp9_refining_search_sad(const struct macroblock *x,
const struct vp9_variance_vtable *fn_ptr,
const struct mv *center_mv);
-// Runs sequence of diamond searches in smaller steps for RD
+// Runs sequence of diamond searches in smaller steps for RD.
int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
@@ -80,6 +80,11 @@ int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv);
+// Perform integral projection based motion estimation.
+unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
+ MACROBLOCK *x,
+ BLOCK_SIZE bsize);
+
typedef int (integer_mv_pattern_search_fn) (
const MACROBLOCK *x,
MV *ref_mv,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 1e88201be..2f9cccb99 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -689,12 +689,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
- if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
@@ -1076,11 +1077,11 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
ctx->pred_pixel_ready = 0;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
int_mv dummy_mv[2];
x->pred_mv_sad[ref_frame] = INT_MAX;
- if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
const struct scale_factors *const sf =
&cm->frame_refs[ref_frame - 1].sf;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 7143987d4..2523d1ea3 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -19,7 +19,8 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
-void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
@@ -29,6 +30,9 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp, eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant) >> 16;
@@ -41,12 +45,16 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
int eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
@@ -69,15 +77,20 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp, eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
- tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT16_MIN, INT16_MAX);
tmp = (tmp * quant) >> 15;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
@@ -96,8 +109,12 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
int eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
@@ -105,8 +122,8 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp =
- (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
- quant) >> 15;
+ (clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT32_MIN, INT32_MAX) * quant) >> 15;
qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
if (tmp)
@@ -521,21 +538,21 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block),
- 16, x->skip_block,
- p->zbin, p->round, p->quant, p->quant_shift,
- BLOCK_OFFSET(p->qcoeff, block),
- BLOCK_OFFSET(pd->dqcoeff, block),
- pd->dequant, &p->eobs[block],
- scan, iscan);
+ 16, x->skip_block,
+ p->zbin, p->round, p->quant, p->quant_shift,
+ BLOCK_OFFSET(p->qcoeff, block),
+ BLOCK_OFFSET(pd->dqcoeff, block),
+ pd->dequant, &p->eobs[block],
+ scan, iscan);
return;
}
#endif
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
- 16, x->skip_block,
- p->zbin, p->round, p->quant, p->quant_shift,
- BLOCK_OFFSET(p->qcoeff, block),
- BLOCK_OFFSET(pd->dqcoeff, block),
- pd->dequant, &p->eobs[block], scan, iscan);
+ 16, x->skip_block,
+ p->zbin, p->round, p->quant, p->quant_shift,
+ BLOCK_OFFSET(p->qcoeff, block),
+ BLOCK_OFFSET(pd->dqcoeff, block),
+ pd->dequant, &p->eobs[block], scan, iscan);
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index de2839f5b..55e546944 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -37,7 +37,8 @@ typedef struct {
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
} QUANTS;
-void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
@@ -49,7 +50,8 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan);
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 0b6d11eca..5cc980cf7 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -532,13 +532,14 @@ int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
}
-const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
- int ref_frame) {
+YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
+ int ref_frame) {
const VP9_COMMON *const cm = &cpi->common;
- const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
- return (scaled_idx != ref_idx) ?
- &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
+ const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ return
+ (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
+ &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
}
int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 59a87cf98..b18a81659 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -147,8 +147,8 @@ int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base);
-const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
- int ref_frame);
+YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
+ int ref_frame);
void vp9_init_me_luts();
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 53a5dba19..51397a791 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -328,12 +328,12 @@ static const int16_t band_counts[TX_SIZES][8] = {
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
-static INLINE int cost_coeffs(MACROBLOCK *x,
- int plane, int block,
- ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
- TX_SIZE tx_size,
- const int16_t *scan, const int16_t *nb,
- int use_fast_coef_costing) {
+static int cost_coeffs(MACROBLOCK *x,
+ int plane, int block,
+ ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
+ TX_SIZE tx_size,
+ const int16_t *scan, const int16_t *nb,
+ int use_fast_coef_costing) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
const struct macroblock_plane *p = &x->plane[plane];
@@ -1316,8 +1316,8 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
- vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
- &mic->bmi[i], sizeof(mic->bmi[i]));
+ vpx_memmove(&mic->bmi[i + idy * 2 + idx],
+ &mic->bmi[i], sizeof(mic->bmi[i]));
return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
thismvcost;
@@ -2022,6 +2022,8 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
+ assert(yv12 != NULL);
+
// TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
// use the UV scaling factors.
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
@@ -2912,6 +2914,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
setup_buffer_inter(cpi, x, tile_info, ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
}
@@ -4238,4 +4241,3 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
store_coding_context(x, ctx, best_ref_index,
best_pred_diff, best_tx_diff, best_filter_diff, 0);
}
-
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 161b5a24d..bf9cad00a 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -91,8 +91,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
if (oxcf->ss_enable_auto_arf[layer])
lc->alt_ref_idx = alt_ref_idx++;
else
- lc->alt_ref_idx = -1;
- lc->gold_ref_idx = -1;
+ lc->alt_ref_idx = INVALID_IDX;
+ lc->gold_ref_idx = INVALID_IDX;
}
lrc->buffer_level = oxcf->starting_buffer_level_ms *
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index 482fa3da3..f49949940 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -90,8 +90,16 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
- s0 = _mm_srai_epi16(s0, 5);
- s1 = _mm_srai_epi16(s1, 5);
+ if (height == 64) {
+ s0 = _mm_srai_epi16(s0, 5);
+ s1 = _mm_srai_epi16(s1, 5);
+ } else if (height == 32) {
+ s0 = _mm_srai_epi16(s0, 4);
+ s1 = _mm_srai_epi16(s1, 4);
+ } else {
+ s0 = _mm_srai_epi16(s0, 3);
+ s1 = _mm_srai_epi16(s1, 3);
+ }
_mm_store_si128((__m128i *)hbuf, s0);
hbuf += 8;
@@ -104,6 +112,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
+ const int norm_factor = 3 + (width >> 5);
for (i = 16; i < width; i += 16) {
ref += 16;
@@ -115,7 +124,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
s1 = _mm_srli_si128(s0, 8);
s0 = _mm_adds_epu16(s0, s1);
- return (_mm_extract_epi16(s0, 0)) >> 5;
+ return _mm_extract_epi16(s0, 0) >> norm_factor;
}
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 9bb880c7c..9e4c1a5c4 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -278,7 +278,7 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) {
VP9_COMMON *const cm = &frame_worker_data->pbi->common;
BufferPool *const pool = cm->buffer_pool;
- cm->new_fb_idx = -1;
+ cm->new_fb_idx = INVALID_IDX;
cm->byte_alignment = ctx->byte_alignment;
if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
@@ -500,7 +500,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
check_resync(ctx, frame_worker_data->pbi);
} else {
- const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
VP9Worker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id];
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
// Copy context from last worker thread to next worker thread.