summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encodeframe.c
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2014-12-02 15:47:41 -0800
committerYunqing Wang <yunqingwang@google.com>2014-12-04 11:21:34 -0800
commiteba9c762a1710e29967207a1df092ae0eca40313 (patch)
tree4afa8eb11f745a3ef28404fb9d7d7b5974422cad /vp9/encoder/vp9_encodeframe.c
parentbf758b6afa7f006f3c8efdd445c82b6d6617b1c3 (diff)
downloadlibvpx-eba9c762a1710e29967207a1df092ae0eca40313.tar
libvpx-eba9c762a1710e29967207a1df092ae0eca40313.tar.gz
libvpx-eba9c762a1710e29967207a1df092ae0eca40313.tar.bz2
libvpx-eba9c762a1710e29967207a1df092ae0eca40313.zip
vp9_ethread: the tile-based multi-threaded encoder
Currently, VP9 supports column-tile encoding, which allows a frame to be encoded in multiple column tiles independently. The number of column tiles are set by encoder option "--tile-columns". This provides a way to encode a frame in parallel. Based on previous set of patches, this patch implemented the tile- based multi-threaded encoder. Each thread processes one or more tiles. Usage: For HD clips: --tile-columns=2 --threads=1/2/3/4 While using 4 threads, tests showed that the encoder achieved 2.3X - 2.5X speedup at good-quality speed 3, and 2X speedup at realtime speed 5. Change-Id: Ied987f8f2618b1283a8643ad255e88341733c9d4
Diffstat (limited to 'vp9/encoder/vp9_encodeframe.c')
-rw-r--r--vp9/encoder/vp9_encodeframe.c59
1 files changed, 34 insertions, 25 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a5adcbb7a..7317c6362 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -36,6 +36,7 @@
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_pickmode.h"
#include "vp9/encoder/vp9_rd.h"
@@ -3422,7 +3423,7 @@ static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
cm->show_frame;
}
-static void init_tile_data(VP9_COMP *cpi) {
+void vp9_init_tile_data(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -3460,36 +3461,40 @@ static void init_tile_data(VP9_COMP *cpi) {
}
}
+void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td,
+ int tile_row, int tile_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ TileDataEnc *this_tile =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ const TileInfo * const tile_info = &this_tile->tile_info;
+ TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
+ int mi_row;
+
+ for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
+ mi_row += MI_BLOCK_SIZE) {
+ if (cpi->sf.use_nonrd_pick_mode)
+ encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
+ else
+ encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
+ }
+ cpi->tok_count[tile_row][tile_col] =
+ (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
+ assert(tok - cpi->tile_tok[tile_row][tile_col] <=
+ allocated_tokens(*tile_info));
+}
+
static void encode_tiles(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row;
- init_tile_data(cpi);
+ vp9_init_tile_data(cpi);
- for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- const TileInfo * const tile_info =
- &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
- TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
- int mi_row;
- TileDataEnc *this_tile =
- &cpi->tile_data[tile_row * tile_cols + tile_col];
-
- for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
- mi_row += MI_BLOCK_SIZE) {
- if (cpi->sf.use_nonrd_pick_mode)
- encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok);
- else
- encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok);
- }
- cpi->tok_count[tile_row][tile_col] =
- (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
- assert(tok - cpi->tile_tok[tile_row][tile_col] <=
- allocated_tokens(*tile_info));
- }
- }
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col)
+ vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
}
#if CONFIG_FP_MB_STATS
@@ -3596,7 +3601,11 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
#endif
- encode_tiles(cpi);
+ // If allowed, encoding tiles in parallel with one thread handling one tile.
+ if (MIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
+ vp9_encode_tiles_mt(cpi);
+ else
+ encode_tiles(cpi);
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);