summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/resize_test.cc52
-rw-r--r--vp10/encoder/rdopt.c10
-rw-r--r--vp8/common/threading.h7
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c1
-rw-r--r--vp9/encoder/vp9_block.h6
-rw-r--r--vp9/encoder/vp9_denoiser.c2
-rw-r--r--vp9/encoder/vp9_encoder.c21
-rw-r--r--vp9/encoder/vp9_mcomp.c33
-rw-r--r--vp9/encoder/vp9_pickmode.c3
-rw-r--r--vp9/encoder/vp9_quantize.c3
-rw-r--r--vp9/encoder/vp9_rd.c4
-rw-r--r--vp9/encoder/vp9_rd.h6
-rw-r--r--vp9/encoder/vp9_rdopt.c35
-rw-r--r--vp9/encoder/x86/vp9_diamond_search_sad_avx.c4
-rw-r--r--vp9/vp9cx.mk2
15 files changed, 139 insertions, 50 deletions
diff --git a/test/resize_test.cc b/test/resize_test.cc
index bc91fe226..c5f05f310 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -94,13 +94,53 @@ unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
if (frame < 10)
return val;
if (frame < 20)
- return val / 2;
+ return val * 3 / 4;
if (frame < 30)
- return val * 2 / 3;
+ return val / 2;
if (frame < 40)
- return val / 4;
+ return val;
if (frame < 50)
- return val * 7 / 8;
+ return val * 3 / 4;
+ if (frame < 60)
+ return val / 2;
+ if (frame < 70)
+ return val * 3 / 4;
+ if (frame < 80)
+ return val;
+ if (frame < 90)
+ return val * 3 / 4;
+ if (frame < 100)
+ return val / 2;
+ if (frame < 110)
+ return val * 3 / 4;
+ if (frame < 120)
+ return val;
+ if (frame < 130)
+ return val * 3 / 4;
+ if (frame < 140)
+ return val / 2;
+ if (frame < 150)
+ return val * 3 / 4;
+ if (frame < 160)
+ return val;
+ if (frame < 170)
+ return val / 2;
+ if (frame < 180)
+ return val * 3 / 4;
+ if (frame < 190)
+ return val;
+ if (frame < 200)
+ return val * 3 / 4;
+ if (frame < 210)
+ return val / 2;
+ if (frame < 220)
+ return val * 3 / 4;
+ if (frame < 230)
+ return val;
+ if (frame < 240)
+ return val / 2;
+ if (frame < 250)
+ return val * 3 / 4;
return val;
}
@@ -108,7 +148,7 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
public:
ResizingVideoSource() {
SetSize(kInitialWidth, kInitialHeight);
- limit_ = 60;
+ limit_ = 300;
}
virtual ~ResizingVideoSource() {}
@@ -347,6 +387,8 @@ class ResizeRealtimeTest : public ::libvpx_test::EncoderTest,
TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
DefaultConfig();
+ // Disable internal resize for this test.
+ cfg_.rc_resize_allowed = 0;
change_bitrate_ = false;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index b1077cb21..c62da964a 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1073,6 +1073,12 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
+ // TODO(any): Add search of the tx_type to improve rd performance at the
+ // expense of speed.
+ mic->mbmi.tx_type = DCT_DCT;
+
+ // Later we can add search of the tx_type to improve results.
+ // For now just set it to DCT_DCT
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -3940,6 +3946,10 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
+ // TODO(any): Add search of the tx_type to improve rd performance at the
+ // expense of speed.
+ mbmi->tx_type = DCT_DCT;
+
if (cm->interp_filter != BILINEAR) {
tmp_best_filter = EIGHTTAP;
if (x->source_variance < sf->disable_filter_search_var_thresh) {
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index a433d03a0..c00e517a7 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -12,6 +12,7 @@
#ifndef VP8_COMMON_THREADING_H_
#define VP8_COMMON_THREADING_H_
+#include "./vpx_config.h"
#ifdef __cplusplus
extern "C" {
@@ -20,7 +21,7 @@ extern "C" {
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
/* Thread management macros */
-#ifdef _WIN32
+#if defined(_WIN32) && !HAVE_PTHREAD_H
/* Win32 */
#include <process.h>
#include <windows.h>
@@ -77,8 +78,8 @@ extern "C" {
#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
#endif
-/* Syncrhronization macros: Win32 and Pthreads */
-#ifdef _WIN32
+/* Synchronization macros: Win32 and Pthreads */
+#if defined(_WIN32) && !HAVE_PTHREAD_H
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index a2e391841..b7cfdf6bf 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -608,4 +608,5 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 3eaa9deb8..147743e8d 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -65,8 +65,14 @@ struct macroblock {
int skip_optimize;
int q_index;
+ // The equivalent error at the current rdmult of one whole bit (not one
+ // bitcost unit).
int errorperbit;
+ // The equivalend SAD error of one (whole) bit at the current quantizer
+ // for large blocks.
int sadperbit16;
+ // The equivalend SAD error of one (whole) bit at the current quantizer
+ // for sub-8x8 blocks.
int sadperbit4;
int rddiv;
int rdmult;
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 99118f5df..e419cffd8 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -332,7 +332,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
struct buf_2d src = mb->plane[0].src;
int is_skin = 0;
- if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenLow) {
+ if (bs <= BLOCK_32X32 && denoiser->denoising_level >= kDenLow) {
is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
mb->plane[1].src.buf,
mb->plane[2].src.buf,
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 8a46738cd..bd9813a77 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1540,6 +1540,10 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
}
update_frame_size(cpi);
+ if ((last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) &&
+ cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_reset_resize(cpi);
+
if ((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
@@ -2971,8 +2975,19 @@ void vp9_scale_references(VP9_COMP *cpi) {
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
+ int buf_idx;
+ RefCntBuffer *buf = NULL;
+ if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
+ // Check for release of scaled reference.
+ buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
+ buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL;
+ if (buf != NULL) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
+ }
+ }
+ buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ buf = &pool->frame_bufs[buf_idx];
buf->buf.y_crop_width = ref->y_crop_width;
buf->buf.y_crop_height = ref->y_crop_height;
cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
@@ -4141,7 +4156,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
- const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH;
+ const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
check_initial_width(cpi, subsampling_x, subsampling_y);
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 607941cfa..8b7825e7b 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -80,27 +80,29 @@ int vp9_mv_bit_cost(const MV *mv, const MV *ref,
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}
-static int mv_err_cost(const MV *mv, const MV *ref,
- const int *mvjcost, int *mvcost[2],
- int error_per_bit) {
+#define PIXEL_TRANSFORM_ERROR_SCALE 4
+static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
+ int *mvcost[2], int error_per_bit) {
if (mvcost) {
- const MV diff = { mv->row - ref->row,
- mv->col - ref->col };
- // TODO(aconverse): See if this shift needs to be tied to
- // VP9_PROB_COST_SHIFT.
- return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, mvjcost, mvcost) *
- error_per_bit, 13);
+ const MV diff = {mv->row - ref->row, mv->col - ref->col};
+ // This product sits at a 32-bit ceiling right now and any additional
+ // accuracy in either bit cost or error cost will cause it to overflow.
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
+ RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
+ PIXEL_TRANSFORM_ERROR_SCALE);
}
return 0;
}
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
- int error_per_bit) {
+ int sad_per_bit) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
- // TODO(aconverse): See if this shift needs to be tied to VP9_PROB_COST_SHIFT.
- return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, x->nmvjointsadcost,
- x->nmvsadcost) * error_per_bit, 8);
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
+ sad_per_bit,
+ VP9_PROB_COST_SHIFT);
}
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
@@ -152,12 +154,13 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
* could reduce the area.
*/
-/* estimated cost of a motion vector (r,c) */
+/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
+ * from the same math as in mv_err_cost(). */
#define MVC(r, c) \
(mvcost ? \
((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
- error_per_bit + 4096) >> 13 : 0)
+ error_per_bit + 8192) >> 14 : 0)
// convert motion vector component to offset for sv[a]f calc
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 829066c9f..d861f8096 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -949,7 +949,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
// TODO(jingning): Skip is signalled per prediciton block not per tx block.
rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
} else {
- unsigned int var, sse;
+ unsigned int var = 0;
+ unsigned int sse = 0;
model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
plane, plane);
}
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 980a49f0a..91f877ed7 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -342,8 +342,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
- x->errorperbit = rdmult >> 6;
- x->errorperbit += (x->errorperbit == 0);
+ set_error_per_bit(x, rdmult);
vp9_initialize_me_consts(cpi, x, x->q_index);
}
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index a8a939ee4..fc32d1911 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -41,7 +41,6 @@
#include "vp9/encoder/vp9_tokenize.h"
#define RD_THRESH_POW 1.25
-#define RD_MULT_EPB_RATIO 64
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
@@ -279,8 +278,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
- x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
- x->errorperbit += (x->errorperbit == 0);
+ set_error_per_bit(x, rd->RDMULT);
x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ? 0 : 1;
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index a92b14edf..9b8e2732c 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -24,6 +24,7 @@ extern "C" {
#endif
#define RDDIV_BITS 7
+#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) \
(ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
@@ -168,6 +169,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
+static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
+ x->errorperbit = rdmult >> RD_EPB_SHIFT;
+ x->errorperbit += (x->errorperbit == 0);
+}
+
void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f00a58ce2..1480ea418 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3355,24 +3355,25 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
}
if (!disable_skip) {
- vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
+ const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
+ const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
+ const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
+
if (skippable) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// Cost the skip mb case
- rate2 += vp9_cost_bit(skip_prob, 1);
+ rate2 += skip_cost1;
} else if (ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv,
- rate_y + rate_uv + vp9_cost_bit(skip_prob, 0),
- distortion2) <
- RDCOST(x->rdmult, x->rddiv,
- vp9_cost_bit(skip_prob, 1), total_sse)) {
+ rate_y + rate_uv + skip_cost0, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
} else {
// FIXME(rbultje) make this work for splitmv also
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+ rate2 += skip_cost1;
distortion2 = total_sse;
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
@@ -3380,7 +3381,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
}
} else {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
}
// Calculate the final RD estimate for this mode.
@@ -4152,17 +4153,21 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
}
if (!disable_skip) {
+ const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
+ const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
+ const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
+
// Skip is never coded at the segment level for sub8x8 blocks and instead
// always coded in the bitstream at the mode info level.
-
if (ref_frame != INTRA_FRAME && !xd->lossless) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
+ if (RDCOST(x->rdmult, x->rddiv,
+ rate_y + rate_uv + skip_cost0, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
} else {
// FIXME(rbultje) make this work for splitmv also
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+ rate2 += skip_cost1;
distortion2 = total_sse;
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
@@ -4172,7 +4177,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
}
} else {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
}
// Calculate the final RD estimate for this mode.
diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
index b475f8db1..0bc417fc1 100644
--- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -47,12 +47,12 @@ static INLINE int mv_cost(const int_mv mv,
}
static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
- int error_per_bit) {
+ int sad_per_bit) {
const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,
mv.as_mv.col - ref->col);
return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost,
x->nmvsadcost) *
- error_per_bit, 8);
+ sad_per_bit, VP9_PROB_COST_SHIFT);
}
/*****************************************************************************
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 83a91e870..2930c23dd 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -119,7 +119,9 @@ endif
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c
+endif
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c