summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'vp8')
-rw-r--r--vp8/encoder/block.h1
-rw-r--r--vp8/encoder/encodeframe.c82
-rw-r--r--vp8/encoder/encodeintra.c2
-rw-r--r--vp8/encoder/encodemb.c2
-rw-r--r--vp8/encoder/ethreading.c12
-rw-r--r--vp8/encoder/onyx_if.c2
-rw-r--r--vp8/encoder/onyx_int.h3
7 files changed, 95 insertions, 9 deletions
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index e94e54976..90b42c35c 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -81,6 +81,7 @@ typedef struct
int errthresh;
int rddiv;
int rdmult;
+ INT64 activity_sum;
int mvcosts[2][MVvals+1];
int *mvcost[2];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 2aac20b31..2002735d2 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -375,6 +375,62 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
}
+/* activity_avg must be positive, or flat regions could get a zero weight
+ * (infinite lambda), which confounds analysis.
+ * This also avoids the need for divide by zero checks in
+ * vp8_activity_masking().
+ */
+#define VP8_ACTIVITY_AVG_MIN (64)
+
+/* This is used as a reference when computing the source variance for the
+ * purposes of activity masking.
+ * Eventually this should be replaced by custom no-reference routines,
+ * which will be faster.
+ */
+static const unsigned char VP8_VAR_OFFS[16]=
+{
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
+};
+
+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
+{
+ unsigned int act;
+ unsigned int sse;
+ int sum;
+ unsigned int a;
+ unsigned int b;
+ unsigned int d;
+ /* TODO: This could also be done over smaller areas (8x8), but that would
+ * require extensive changes elsewhere, as lambda is assumed to be fixed
+ * over an entire MB in most of the code.
+ * Another option is to compute four 8x8 variances, and pick a single
+ * lambda using a non-linear combination (e.g., the smallest, or second
+ * smallest, etc.).
+ */
+ VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
+ x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
+ /* This requires a full 32 bits of precision. */
+ act = (sse<<8) - sum*sum;
+ /* Drop 4 to give us some headroom to work with. */
+ act = (act + 8) >> 4;
+ /* If the region is flat, lower the activity some more. */
+ if (act < 8<<12)
+ act = act < 5<<12 ? act : 5<<12;
+ /* TODO: For non-flat regions, edge regions should receive less masking
+ * than textured regions, but identifying edge regions quickly and
+ * reliably enough is still a subject of experimentation.
+ * This will be most noticable near edges with a complex shape (e.g.,
+ * text), but the 4x4 transform size should make this less of a problem
+ * than it would be for an 8x8 transform.
+ */
+ /* Apply the masking to the RD multiplier. */
+ a = act + 4*cpi->activity_avg;
+ b = 4*act + cpi->activity_avg;
+ x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
+ return act;
+}
+
+
static
void encode_mb_row(VP8_COMP *cpi,
@@ -386,6 +442,7 @@ void encode_mb_row(VP8_COMP *cpi,
int *segment_counts,
int *totalrate)
{
+ INT64 activity_sum = 0;
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
@@ -437,6 +494,11 @@ void encode_mb_row(VP8_COMP *cpi,
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
+ x->rddiv = cpi->RDDIV;
+ x->rdmult = cpi->RDMULT;
+
+ activity_sum += vp8_activity_masking(cpi, x);
+
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
@@ -543,6 +605,7 @@ void encode_mb_row(VP8_COMP *cpi,
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
+ x->activity_sum += activity_sum;
}
@@ -659,8 +722,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
vp8_setup_block_ptrs(x);
- x->rddiv = cpi->RDDIV;
- x->rdmult = cpi->RDMULT;
+ x->activity_sum = 0;
#if 0
// Experimental rd code
@@ -715,11 +777,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
else
{
#if CONFIG_MULTITHREAD
+ int i;
+
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{
- int i;
cpi->current_mb_col_main = -1;
for (i = 0; i < cpi->encoding_thread_count; i++)
@@ -797,6 +860,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
totalrate += cpi->mb_row_ei[i].totalrate;
}
+ for (i = 0; i < cpi->encoding_thread_count; i++)
+ {
+ x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
+ }
+
#endif
}
@@ -932,6 +1000,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
+ /* Update the average activity for the next frame.
+ * This is feed-forward for now; it could also be saved in two-pass, or
+ * done during lookahead when that is eventually added.
+ */
+ cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
+ if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
+ cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
+
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 1c72b90f1..0f327cec0 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY)
#if 1
- if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
+ if (x->optimize)
vp8_optimize_mby(x, rtcd);
#endif
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index e9753ac48..f7faaa14a 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -635,7 +635,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mb(x);
#if !(CONFIG_REALTIME_ONLY)
- if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
+ if (x->optimize)
vp8_optimize_mb(x, rtcd);
#endif
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 962e74174..3646375ed 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
volatile int *last_row_current_mb_col;
+ INT64 activity_sum = 0;
if (ithread > 0)
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
@@ -111,6 +112,11 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
+ x->rddiv = cpi->RDDIV;
+ x->rdmult = cpi->RDMULT;
+
+ activity_sum += vp8_activity_masking(cpi, x);
+
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
@@ -197,6 +203,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
+ x->activity_sum += activity_sum;
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@@ -240,8 +247,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->sadperbit16 = x->sadperbit16;
z->sadperbit4 = x->sadperbit4;
z->errthresh = x->errthresh;
- z->rddiv = x->rddiv;
- z->rdmult = x->rdmult;
/*
z->mv_col_min = x->mv_col_min;
@@ -392,8 +397,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
vp8_setup_block_ptrs(mb);
- mb->rddiv = cpi->RDDIV;
- mb->rdmult = cpi->RDMULT;
+ mb->activity_sum = 0;
mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ea72de2de..46571156b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2205,6 +2205,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
init_context_counters();
#endif
+ /*Initialize the feed-forward activity masking.*/
+ cpi->activity_avg = 90<<12;
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 0eaba0017..1471c1dd0 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -321,6 +321,7 @@ typedef struct
int mvcostmultiplier;
int subseqblockweight;
int errthresh;
+ unsigned int activity_avg;
int RDMULT;
int RDDIV ;
@@ -676,6 +677,8 @@ void vp8_encode_frame(VP8_COMP *cpi);
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
+
int rd_cost_intra_mb(MACROBLOCKD *x);
void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);