diff options
author | Attila Nagy <attilanagy@google.com> | 2012-02-09 12:37:03 +0200 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2012-02-29 12:13:37 -0500 |
commit | 52cf4dcaea10f97d25d8a3585704a1e47b384751 (patch) | |
tree | 6fb5acff2e02f2e153e206284f5075ebce548674 /vp8/encoder/ethreading.c | |
parent | ce328b855f951e5a9fd0d9d92df09d65ea0d8be9 (diff) | |
download | libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar.gz libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar.bz2 libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.zip |
Packing bitstream on-the-fly with delayed context updates
Produce the token partitions on-the-fly, while processing each MB.
Context is updated at the beginning of each frame based on the
previoud frame's counters. Optimally encoder outputs partitions in
separate buffers. For frame based output, partitions are concatenated
internally.
Limitations:
- enabled just in combination with realtime-only mode
- number of encoding threads has to be equal or less than the
number of token partitions. For this reason, by default the encoder
will do 8 token partitions.
- vpxenc supports partition output (-P) just in combination with
IVF output format (--ivf)
Performance:
- Realtime encoder can be up to 13% faster (ARM) depending on the number
of threads and bitrate settings. Constant gain over the 5-16 speed
range.
- Token buffer reduced from one frame to 8 MBs
Quality:
- quality is affected by the delayed context updates. This again
dependents on input material, speed and bitrate settings. For VC
style input the loss seen is up to 0.2dB. If error-resilient=2
mode is used than the effect of this change is negligible.
Example:
./configure --enable-realtime-only --enable-onthefly-bitpacking
./vpxenc --rt --end-usage=1 --fps=30000/1000 -w 640 -h 480
--target-bitrate=1000 --token-parts=3 --static-thresh=2000
--ivf -P -t 4 -o strm.ivf tanya_640x480.yuv
Change-Id: I127295cb85b835fc287e1c0201a67e378d025d76
Diffstat (limited to 'vp8/encoder/ethreading.c')
-rw-r--r-- | vp8/encoder/ethreading.c | 61 |
1 files changed, 40 insertions, 21 deletions
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 2874e7845..b549a7dca 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -13,6 +13,8 @@ #include "vp8/common/common.h" #include "vp8/common/extend.h" +#include "bitstream.h" + #if CONFIG_MULTITHREAD extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, @@ -74,6 +76,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) MACROBLOCK *x = &mbri->mb; MACROBLOCKD *xd = &x->e_mbd; TOKENEXTRA *tp ; +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24); + const int num_part = (1 << cm->multi_token_partition); +#endif int *segment_counts = mbri->segment_counts; int *totalrate = &mbri->totalrate; @@ -91,9 +97,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cm->mb_cols); - volatile int *last_row_current_mb_col; + volatile const int *last_row_current_mb_col; + volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; +#else tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); + cpi->tplist[mb_row].start = tp; +#endif last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; @@ -107,25 +119,27 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); - cpi->tplist[mb_row].start = tp; - - //printf("Thread mb_row = %d\n", mb_row); - // Set the mb activity pointer to the start of the row. x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { + *current_mb_col = mb_col - 1; + if ((mb_col & (nsync - 1)) == 0) { - while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1) + while (mb_col > (*last_row_current_mb_col - nsync)) { x86_pause_hint(); thread_sleep(0); } } +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + tp = tp_start; +#endif + // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to values that are in 1/8th pel units xd->mb_to_left_edge = -((mb_col * 16) << 3); @@ -154,7 +168,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) vp8_activity_masking(cpi, x); // Is segmentation enabled - // MB level adjutment to quantizer + // MB level adjustment to quantizer if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) @@ -196,13 +210,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) #endif - // Count of last ref frame 0,0 useage + // Count of last ref frame 0,0 usage if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) cpi->inter_zz_count++; // Special case code for cyclic refresh // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode - // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map + // during vp8cx_encode_inter_macroblock()) back into the global segmentation map if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) { const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; @@ -223,9 +237,17 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) } } - cpi->tplist[mb_row].stop = tp; - // Increment pointer into gf useage flags structure. +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + /* pack tokens for this MB */ + { + int tok_count = tp - tp_start; + pack_tokens(w, tp_start, tok_count); + } +#else + cpi->tplist[mb_row].stop = tp; +#endif + // Increment pointer into gf usage flags structure. x->gf_active_ptr++; // Increment the activity mask pointers. @@ -239,23 +261,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment useage + // Keep track of segment usage segment_counts[xd->mode_info_context->mbmi.segment_id]++; // skip to next mb xd->mode_info_context++; x->partition_info++; xd->above_context++; - - cpi->mt_current_mb_col[mb_row] = mb_col; } - //extend the recon for intra prediction - vp8_extend_mb_row( - &cm->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, - xd->dst.v_buffer + 8); + vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], + xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, + xd->dst.v_buffer + 8); + + *current_mb_col = mb_col + nsync; // this is to account for the border xd->mode_info_context++; @@ -271,7 +291,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) if (mb_row == cm->mb_rows - 1) { - //SetEvent(cpi->h_event_main); sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ } } |