diff options
Diffstat (limited to 'vp9/decoder')
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 56 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.h | 4 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.c | 16 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.h | 16 | ||||
-rw-r--r-- | vp9/decoder/vp9_dthread.c | 21 | ||||
-rw-r--r-- | vp9/decoder/vp9_dthread.h | 4 |
6 files changed, 66 insertions, 51 deletions
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 5a2e6f881..9b63961f0 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -671,7 +671,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, setup_display_size(cm, rb); } -static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, +static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; @@ -776,7 +776,7 @@ typedef struct TileBuffer { int col; // only used with multi-threaded decoding } TileBuffer; -static const uint8_t *decode_tiles(VP9D_COMP *pbi, +static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; @@ -865,7 +865,7 @@ static int compare_tile_buffers(const void *a, const void *b) { } } -static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; @@ -882,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, assert(tile_rows == 1); (void)tile_rows; - if (num_workers > pbi->num_tile_workers) { + // TODO(jzern): See if we can remove the restriction of passing in max + // threads to the decoder. + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->oxcf.max_threads & ~1; int i; + // TODO(jzern): Allocate one less worker, as in the current code we only + // use num_threads - 1 workers. CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_realloc(pbi->tile_workers, - num_workers * sizeof(*pbi->tile_workers))); - for (i = pbi->num_tile_workers; i < num_workers; ++i) { + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (i = 0; i < num_threads; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; ++pbi->num_tile_workers; @@ -895,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(TileWorkerData))); CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); - if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + if (i < num_threads - 1 && !vp9_worker_reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } @@ -903,7 +907,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, } // Reset tile decoding hook - for (n = 0; n < pbi->num_tile_workers; ++n) { + for (n = 0; n < num_workers; ++n) { pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook; } @@ -1005,12 +1009,13 @@ static void error_handler(void *data) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -#define RESERVED \ - if (vp9_rb_read_bit(rb)) \ - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ - "Reserved bit must be unset") +static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { + int profile = vp9_rb_read_bit(rb); + profile |= vp9_rb_read_bit(rb) << 1; + return (BITSTREAM_PROFILE) profile; +} -static size_t read_uncompressed_header(VP9D_COMP *pbi, +static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; size_t sz; @@ -1022,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); - cm->version = vp9_rb_read_bit(rb); - RESERVED; + cm->profile = read_profile(rb); + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); cm->show_existing_frame = vp9_rb_read_bit(rb); if (cm->show_existing_frame) { @@ -1048,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, if (cm->frame_type == KEY_FRAME) { check_sync_code(cm, rb); - + if (cm->profile > PROFILE_1) + cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10; cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_x = vp9_rb_read_bit(rb); cm->subsampling_y = vp9_rb_read_bit(rb); vp9_rb_read_bit(rb); // has extra plane @@ -1060,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->subsampling_y = cm->subsampling_x = 1; } } else { - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_y = cm->subsampling_x = 0; vp9_rb_read_bit(rb); // has extra plane } else { @@ -1147,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, return sz; } -static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; @@ -1247,7 +1255,7 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) { } #endif // NDEBUG -int vp9_decode_frame(VP9D_COMP *pbi, +int vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; @@ -1288,11 +1296,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, } init_macroblockd(cm, &pbi->mb); - - if (cm->coding_use_prev_mi) - set_prev_mi(cm); - else - cm->prev_mi = NULL; + cm->prev_mi = get_prev_mi(cm); setup_plane_dequants(cm, xd, cm->base_qindex); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h index e474db495..8a19dafc5 100644 --- a/vp9/decoder/vp9_decodeframe.h +++ b/vp9/decoder/vp9_decodeframe.h @@ -17,11 +17,11 @@ extern "C" { #endif struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; void vp9_init_dequantizer(struct VP9Common *cm); -int vp9_decode_frame(struct VP9Decompressor *pbi, +int vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index fb3666cbe..fd74478e9 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -110,8 +110,8 @@ void vp9_initialize_dec() { } } -VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) { - VP9D_COMP *const pbi = vpx_memalign(32, sizeof(*pbi)); +VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) { + VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) @@ -152,7 +152,7 @@ VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) { return pbi; } -void vp9_decoder_remove(VP9D_COMP *pbi) { +void vp9_decoder_remove(VP9Decoder *pbi) { VP9_COMMON *const cm = &pbi->common; int i; @@ -182,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a, a->uv_height == b->uv_height && a->uv_width == b->uv_width; } -vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &pbi->common; @@ -252,7 +252,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, } -int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { +int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &pbi->common; if (index < 0 || index >= REF_FRAMES) @@ -263,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { } /* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9D_COMP *pbi) { +static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; @@ -287,7 +287,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { cm->frame_refs[ref_index].idx = INT_MAX; } -int vp9_receive_compressed_data(VP9D_COMP *pbi, +int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource, int64_t time_stamp) { VP9_COMMON *const cm = &pbi->common; @@ -403,7 +403,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, return retcode; } -int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags) { int ret = -1; diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 2e8bebdae..c9dc25191 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -35,7 +35,7 @@ typedef struct { int inv_tile_order; } VP9D_CONFIG; -typedef struct VP9Decompressor { +typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, VP9_COMMON, common); @@ -59,20 +59,20 @@ typedef struct VP9Decompressor { int num_tile_workers; VP9LfSync lf_row_sync; -} VP9D_COMP; +} VP9Decoder; void vp9_initialize_dec(); -int vp9_receive_compressed_data(struct VP9Decompressor *pbi, +int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, const uint8_t **dest, int64_t time_stamp); -int vp9_get_raw_frame(struct VP9Decompressor *pbi, +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags); -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); @@ -80,13 +80,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp9_get_reference_dec(struct VP9Decompressor *pbi, +int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); -struct VP9Decompressor *vp9_decoder_create(const VP9D_CONFIG *oxcf); +struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf); -void vp9_decoder_remove(struct VP9Decompressor *pbi); +void vp9_decoder_remove(struct VP9Decoder *pbi); #ifdef __cplusplus } // extern "C" diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index 163936021..9b124c9d9 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -132,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. -void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, +void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. @@ -168,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); // Set up loopfilter thread data. - for (i = 0; i < pbi->num_tile_workers; ++i) { + // The decoder is using num_workers instead of pbi->num_tile_workers + // because it has been observed that using more threads on the + // loopfilter, than there are tile columns in the frame will hurt + // performance on Android. This is because the system will only + // schedule the tile decode workers on cores equal to the number + // of tile columns. Then if the decoder tries to use more threads for the + // loopfilter, it will hurt performance because of contention. If the + // multithreading code changes in the future then the number of workers + // used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; LFWorkerData *const lf_data = &tile_data->lfdata; @@ -184,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, lf_data->y_only = y_only; // always do all planes in decoder lf_data->lf_sync = &pbi->lf_row_sync; - lf_data->num_lf_workers = pbi->num_tile_workers; + lf_data->num_lf_workers = num_workers; // Start loopfiltering - if (i == pbi->num_tile_workers - 1) { + if (i == num_workers - 1) { vp9_worker_execute(worker); } else { vp9_worker_launch(worker); @@ -195,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, } // Wait till all rows are finished - for (i = 0; i < pbi->num_tile_workers; ++i) { + for (i = 0; i < num_workers; ++i) { vp9_worker_sync(&pbi->tile_workers[i]); } } diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index 2f65e1e30..005bd7bbd 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -18,7 +18,7 @@ struct macroblockd; struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; typedef struct TileWorkerData { struct VP9Common *cm; @@ -50,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync, void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi, +void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, struct VP9Common *cm, struct macroblockd *xd, int frame_filter_level, |