summaryrefslogtreecommitdiff
path: root/vp8/encoder/ethreading.c
diff options
context:
space:
mode:
authorAttila Nagy <attilanagy@google.com>2012-02-09 12:37:03 +0200
committerYunqing Wang <yunqingwang@google.com>2012-02-29 12:13:37 -0500
commit52cf4dcaea10f97d25d8a3585704a1e47b384751 (patch)
tree6fb5acff2e02f2e153e206284f5075ebce548674 /vp8/encoder/ethreading.c
parentce328b855f951e5a9fd0d9d92df09d65ea0d8be9 (diff)
downloadlibvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar
libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar.gz
libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.tar.bz2
libvpx-52cf4dcaea10f97d25d8a3585704a1e47b384751.zip
Packing bitstream on-the-fly with delayed context updates
Produce the token partitions on-the-fly, while processing each MB. Context is updated at the beginning of each frame based on the previoud frame's counters. Optimally encoder outputs partitions in separate buffers. For frame based output, partitions are concatenated internally. Limitations: - enabled just in combination with realtime-only mode - number of encoding threads has to be equal or less than the number of token partitions. For this reason, by default the encoder will do 8 token partitions. - vpxenc supports partition output (-P) just in combination with IVF output format (--ivf) Performance: - Realtime encoder can be up to 13% faster (ARM) depending on the number of threads and bitrate settings. Constant gain over the 5-16 speed range. - Token buffer reduced from one frame to 8 MBs Quality: - quality is affected by the delayed context updates. This again dependents on input material, speed and bitrate settings. For VC style input the loss seen is up to 0.2dB. If error-resilient=2 mode is used than the effect of this change is negligible. Example: ./configure --enable-realtime-only --enable-onthefly-bitpacking ./vpxenc --rt --end-usage=1 --fps=30000/1000 -w 640 -h 480 --target-bitrate=1000 --token-parts=3 --static-thresh=2000 --ivf -P -t 4 -o strm.ivf tanya_640x480.yuv Change-Id: I127295cb85b835fc287e1c0201a67e378d025d76
Diffstat (limited to 'vp8/encoder/ethreading.c')
-rw-r--r--vp8/encoder/ethreading.c61
1 files changed, 40 insertions, 21 deletions
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 2874e7845..b549a7dca 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -13,6 +13,8 @@
#include "vp8/common/common.h"
#include "vp8/common/extend.h"
+#include "bitstream.h"
+
#if CONFIG_MULTITHREAD
extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
@@ -74,6 +76,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
MACROBLOCK *x = &mbri->mb;
MACROBLOCKD *xd = &x->e_mbd;
TOKENEXTRA *tp ;
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24);
+ const int num_part = (1 << cm->multi_token_partition);
+#endif
int *segment_counts = mbri->segment_counts;
int *totalrate = &mbri->totalrate;
@@ -91,9 +97,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cm->mb_cols);
- volatile int *last_row_current_mb_col;
+ volatile const int *last_row_current_mb_col;
+ volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
+ vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
+#else
tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24));
+ cpi->tplist[mb_row].start = tp;
+#endif
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
@@ -107,25 +119,27 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);
- cpi->tplist[mb_row].start = tp;
-
- //printf("Thread mb_row = %d\n", mb_row);
-
// Set the mb activity pointer to the start of the row.
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
+ *current_mb_col = mb_col - 1;
+
if ((mb_col & (nsync - 1)) == 0)
{
- while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1)
+ while (mb_col > (*last_row_current_mb_col - nsync))
{
x86_pause_hint();
thread_sleep(0);
}
}
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ tp = tp_start;
+#endif
+
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to values that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3);
@@ -154,7 +168,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
vp8_activity_masking(cpi, x);
// Is segmentation enabled
- // MB level adjutment to quantizer
+ // MB level adjustment to quantizer
if (xd->segmentation_enabled)
{
// Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
@@ -196,13 +210,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
#endif
- // Count of last ref frame 0,0 useage
+ // Count of last ref frame 0,0 usage
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count++;
// Special case code for cyclic refresh
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
- // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
+ // during vp8cx_encode_inter_macroblock()) back into the global segmentation map
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
{
const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
@@ -223,9 +237,17 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
}
}
- cpi->tplist[mb_row].stop = tp;
- // Increment pointer into gf useage flags structure.
+#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
+ /* pack tokens for this MB */
+ {
+ int tok_count = tp - tp_start;
+ pack_tokens(w, tp_start, tok_count);
+ }
+#else
+ cpi->tplist[mb_row].stop = tp;
+#endif
+ // Increment pointer into gf usage flags structure.
x->gf_active_ptr++;
// Increment the activity mask pointers.
@@ -239,23 +261,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_yoffset += 16;
recon_uvoffset += 8;
- // Keep track of segment useage
+ // Keep track of segment usage
segment_counts[xd->mode_info_context->mbmi.segment_id]++;
// skip to next mb
xd->mode_info_context++;
x->partition_info++;
xd->above_context++;
-
- cpi->mt_current_mb_col[mb_row] = mb_col;
}
- //extend the recon for intra prediction
- vp8_extend_mb_row(
- &cm->yv12_fb[dst_fb_idx],
- xd->dst.y_buffer + 16,
- xd->dst.u_buffer + 8,
- xd->dst.v_buffer + 8);
+ vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx],
+ xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8,
+ xd->dst.v_buffer + 8);
+
+ *current_mb_col = mb_col + nsync;
// this is to account for the border
xd->mode_info_context++;
@@ -271,7 +291,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
if (mb_row == cm->mb_rows - 1)
{
- //SetEvent(cpi->h_event_main);
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
}
}