summaryrefslogtreecommitdiff
path: root/vp8/encoder/encodeframe.c
diff options
context:
space:
mode:
authorAttila Nagy <attilanagy@google.com>2012-02-09 12:37:03 +0200
committerYunqing Wang <yunqingwang@google.com>2012-02-29 12:13:37 -0500
commit52cf4dcaea10f97d25d8a3585704a1e47b384751 (patch)
tree6fb5acff2e02f2e153e206284f5075ebce548674 /vp8/encoder/encodeframe.c
parentce328b855f951e5a9fd0d9d92df09d65ea0d8be9 (diff)
downloadlibvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar
libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar.gz
libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar.bz2
libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.zip
Packing bitstream on-the-fly with delayed context updates
Produce the token partitions on-the-fly, while processing each MB. Context is updated at the beginning of each frame based on the previoud frame's counters. Optimally encoder outputs partitions in separate buffers. For frame based output, partitions are concatenated internally. Limitations: - enabled just in combination with realtime-only mode - number of encoding threads has to be equal or less than the number of token partitions. For this reason, by default the encoder will do 8 token partitions. - vpxenc supports partition output (-P) just in combination with IVF output format (--ivf) Performance: - Realtime encoder can be up to 13% faster (ARM) depending on the number of threads and bitrate settings. Constant gain over the 5-16 speed range. - Token buffer reduced from one frame to 8 MBs Quality: - quality is affected by the delayed context updates. This again dependents on input material, speed and bitrate settings. For VC style input the loss seen is up to 0.2dB. If error-resilient=2 mode is used than the effect of this change is negligible. Example: ./configure --enable-realtime-only --enable-onthefly-bitpacking ./vpxenc --rt --end-usage=1 --fps=30000/1000 -w 640 -h 480 --target-bitrate=1000 --token-parts=3 --static-thresh=2000 --ivf -P -t 4 -o strm.ivf tanya_640x480.yuv Change-Id: I127295cb85b835fc287e1c0201a67e378d025d76
Diffstat (limited to 'vp8/encoder/encodeframe.c')
-rw-r--r--vp8/encoder/encodeframe.c135
1 files changed, 93 insertions, 42 deletions
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 989956507..21757f8f0 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -28,6 +28,9 @@
#include <limits.h>
#include "vp8/common/invtrans.h"
#include "vpx_ports/vpx_timer.h"
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+#include "bitstream.h"
+#endif
extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
@@ -373,10 +376,17 @@ void encode_mb_row(VP8_COMP *cpi,
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cpi->common.mb_cols);
+#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
+ const int num_part = (1 << cm->multi_token_partition);
+ TOKENEXTRA * tp_start = cpi->tok;
+ vp8_writer *w;
+#endif
+
#if CONFIG_MULTITHREAD
const int nsync = cpi->mt_sync_range;
- const int rightmost_col = cm->mb_cols - 1;
+ const int rightmost_col = cm->mb_cols + nsync;
volatile const int *last_row_current_mb_col;
+ volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
@@ -384,6 +394,13 @@ void encode_mb_row(VP8_COMP *cpi,
last_row_current_mb_col = &rightmost_col;
#endif
+#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
+ if(num_part > 1)
+ w= &cpi->bc[1 + (mb_row % num_part)];
+ else
+ w = &cpi->bc[1];
+#endif
+
// reset above block coeffs
xd->above_context = cm->above_context;
@@ -411,6 +428,10 @@ void encode_mb_row(VP8_COMP *cpi,
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
+
+#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
+ *tp = cpi->tok;
+#endif
// Distance of Mb to the left & right edges, specified in
// 1/8th pel units as they are always compared to values
// that are in 1/8th pel units
@@ -435,12 +456,13 @@ void encode_mb_row(VP8_COMP *cpi,
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
#if CONFIG_MULTITHREAD
- if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
+ if (cpi->b_multi_threaded != 0)
{
+ *current_mb_col = mb_col - 1; // set previous MB done
+
if ((mb_col & (nsync - 1)) == 0)
{
- while (mb_col > (*last_row_current_mb_col - nsync)
- && (*last_row_current_mb_col) != (cm->mb_cols - 1))
+ while (mb_col > (*last_row_current_mb_col - nsync))
{
x86_pause_hint();
thread_sleep(0);
@@ -495,13 +517,13 @@ void encode_mb_row(VP8_COMP *cpi,
#endif
- // Count of last ref frame 0,0 useage
+ // Count of last ref frame 0,0 usage
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count ++;
// Special case code for cyclic refresh
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
- // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
+ // during vp8cx_encode_inter_macroblock()) back into the global segmentation map
if ((cpi->current_layer == 0) &&
(cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled))
{
@@ -525,7 +547,14 @@ void encode_mb_row(VP8_COMP *cpi,
cpi->tplist[mb_row].stop = *tp;
- // Increment pointer into gf useage flags structure.
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ /* pack tokens for this MB */
+ {
+ int tok_count = *tp - tp_start;
+ pack_tokens(w, tp_start, tok_count);
+ }
+#endif
+ // Increment pointer into gf usage flags structure.
x->gf_active_ptr++;
// Increment the activity mask pointers.
@@ -539,39 +568,29 @@ void encode_mb_row(VP8_COMP *cpi,
recon_yoffset += 16;
recon_uvoffset += 8;
- // Keep track of segment useage
+ // Keep track of segment usage
segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
// skip to next mb
xd->mode_info_context++;
x->partition_info++;
-
xd->above_context++;
-#if CONFIG_MULTITHREAD
- if (cpi->b_multi_threaded != 0)
- {
- cpi->mt_current_mb_col[mb_row] = mb_col;
- }
-#endif
}
//extend the recon for intra prediction
- vp8_extend_mb_row(
- &cm->yv12_fb[dst_fb_idx],
- xd->dst.y_buffer + 16,
- xd->dst.u_buffer + 8,
- xd->dst.v_buffer + 8);
+ vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx],
+ xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8,
+ xd->dst.v_buffer + 8);
+
+#if CONFIG_MULTITHREAD
+ if (cpi->b_multi_threaded != 0)
+ *current_mb_col = rightmost_col;
+#endif
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
-
-#if CONFIG_MULTITHREAD
- if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
- {
- sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
- }
-#endif
}
void init_encode_frame_mb_context(VP8_COMP *cpi)
@@ -599,7 +618,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
if (cm->frame_type == KEY_FRAME)
vp8_init_mbmode_probs(cm);
- // Copy data over into macro block data sturctures.
+ // Copy data over into macro block data structures.
x->src = * cpi->Source;
xd->pre = cm->yv12_fb[cm->lst_fb_idx];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
@@ -656,10 +675,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
MACROBLOCK *const x = & cpi->mb;
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
-
TOKENEXTRA *tp = cpi->tok;
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition
+ const int num_part = (1 << cm->multi_token_partition);
+#endif
vpx_memset(segment_counts, 0, sizeof(segment_counts));
totalrate = 0;
@@ -694,6 +716,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->prediction_error = 0;
cpi->intra_error = 0;
cpi->skip_true_count = 0;
+ cpi->tok_count = 0;
#if 0
// Experimental code
@@ -704,6 +727,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
xd->mode_info_context = cm->mi;
vp8_zero(cpi->MVcount);
+
vp8_zero(cpi->coef_counts);
vp8cx_frame_init_quantizer(cpi);
@@ -722,9 +746,22 @@ void vp8_encode_frame(VP8_COMP *cpi)
build_activity_map(cpi);
}
- // re-initencode frame context.
+ // re-init encode frame context.
init_encode_frame_mb_context(cpi);
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ {
+ int i;
+ for(i = 0; i < num_part; i++)
+ {
+ vp8_start_encode(&bc[i], cpi->partition_d[i + 1],
+ cpi->partition_d_end[i + 1]);
+ bc[i].error = &cm->error;
+ }
+ }
+
+#endif
+
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
@@ -748,7 +785,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
{
vp8_zero(cm->left_context)
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ tp = cpi->tok;
+#else
tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
+#endif
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
@@ -761,12 +802,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count;
+ if(mb_row == cm->mb_rows - 1)
+ {
+ sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
+ }
}
sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
- cpi->tok_count = 0;
-
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
{
cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
@@ -799,9 +842,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
-
vp8_zero(cm->left_context)
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ tp = cpi->tok;
+#endif
+
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
// adjust to the next row of mbs
@@ -811,16 +857,25 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
cpi->tok_count = tp - cpi->tok;
+ }
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ {
+ int i;
+ for(i = 0; i < num_part; i++)
+ {
+ vp8_stop_encode(&bc[i]);
+ cpi->partition_sz[i+1] = bc[i].pos;
+ }
}
+#endif
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
-
}
- // Work out the segment probabilites if segmentation is enabled
+ // Work out the segment probabilities if segmentation is enabled
if (xd->segmentation_enabled)
{
int tot_count;
@@ -908,20 +963,16 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
#endif
- // Adjust the projected reference frame useage probability numbers to reflect
- // what we have just seen. This may be usefull when we make multiple itterations
+#if ! CONFIG_REALTIME_ONLY
+ // Adjust the projected reference frame usage probability numbers to reflect
+ // what we have just seen. This may be useful when we make multiple iterations
// of the recode loop rather than continuing to use values from the previous frame.
if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
(!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)))
{
vp8_convert_rfct_to_prob(cpi);
}
-
-#if 0
- // Keep record of the total distortion this time around for future use
- cpi->last_frame_distortion = cpi->frame_distortion;
#endif
-
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{