diff options
author | Yunqing Wang <yunqingwang@google.com> | 2014-12-02 15:47:41 -0800 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2014-12-04 11:21:34 -0800 |
commit | eba9c762a1710e29967207a1df092ae0eca40313 (patch) | |
tree | 4afa8eb11f745a3ef28404fb9d7d7b5974422cad /vp9/encoder/vp9_encodeframe.c | |
parent | bf758b6afa7f006f3c8efdd445c82b6d6617b1c3 (diff) | |
download | libvpx-eba9c762a1710e29967207a1df092ae0eca40313.tar libvpx-eba9c762a1710e29967207a1df092ae0eca40313.tar.gz libvpx-eba9c762a1710e29967207a1df092ae0eca40313.tar.bz2 libvpx-eba9c762a1710e29967207a1df092ae0eca40313.zip |
vp9_ethread: the tile-based multi-threaded encoder
Currently, VP9 supports column-tile encoding, which allows a frame
to be encoded in multiple column tiles independently. The number of
column tiles are set by encoder option "--tile-columns". This
provides a way to encode a frame in parallel.
Based on previous set of patches, this patch implemented the tile-
based multi-threaded encoder. Each thread processes one or more
tiles.
Usage:
For HD clips:
--tile-columns=2 --threads=1/2/3/4
While using 4 threads, tests showed that the encoder achieved
2.3X - 2.5X speedup at good-quality speed 3, and 2X speedup at
realtime speed 5.
Change-Id: Ied987f8f2618b1283a8643ad255e88341733c9d4
Diffstat (limited to 'vp9/encoder/vp9_encodeframe.c')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 59 |
1 files changed, 34 insertions, 25 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a5adcbb7a..7317c6362 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -36,6 +36,7 @@ #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" +#include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_rd.h" @@ -3422,7 +3423,7 @@ static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { cm->show_frame; } -static void init_tile_data(VP9_COMP *cpi) { +void vp9_init_tile_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -3460,36 +3461,40 @@ static void init_tile_data(VP9_COMP *cpi) { } } +void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, + int tile_row, int tile_col) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + TileDataEnc *this_tile = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + const TileInfo * const tile_info = &this_tile->tile_info; + TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; + int mi_row; + + for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; + mi_row += MI_BLOCK_SIZE) { + if (cpi->sf.use_nonrd_pick_mode) + encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); + else + encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); + } + cpi->tok_count[tile_row][tile_col] = + (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]); + assert(tok - cpi->tile_tok[tile_row][tile_col] <= + allocated_tokens(*tile_info)); +} + static void encode_tiles(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_col, tile_row; - init_tile_data(cpi); + vp9_init_tile_data(cpi); - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const TileInfo * const tile_info = - &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info; - TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; - int mi_row; - TileDataEnc *this_tile = - &cpi->tile_data[tile_row * tile_cols + tile_col]; - - for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; - mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode) - encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok); - else - encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok); - } - cpi->tok_count[tile_row][tile_col] = - (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]); - assert(tok - cpi->tile_tok[tile_row][tile_col] <= - allocated_tokens(*tile_info)); - } - } + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) + vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); } #if CONFIG_FP_MB_STATS @@ -3596,7 +3601,11 @@ static void encode_frame_internal(VP9_COMP *cpi) { } #endif - encode_tiles(cpi); + // If allowed, encoding tiles in parallel with one thread handling one tile. + if (MIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) + vp9_encode_tiles_mt(cpi); + else + encode_tiles(cpi); vpx_usec_timer_mark(&emr_timer); cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); |