summaryrefslogtreecommitdiff
path: root/vp9/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/decoder')
-rw-r--r--vp9/decoder/vp9_decodeframe.c56
-rw-r--r--vp9/decoder/vp9_decodeframe.h4
-rw-r--r--vp9/decoder/vp9_decoder.c16
-rw-r--r--vp9/decoder/vp9_decoder.h16
-rw-r--r--vp9/decoder/vp9_dthread.c21
-rw-r--r--vp9/decoder/vp9_dthread.h4
6 files changed, 66 insertions, 51 deletions
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 5a2e6f881..9b63961f0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -671,7 +671,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
setup_display_size(cm, rb);
}
-static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
+static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
vp9_reader *r) {
const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const cm = &pbi->common;
@@ -776,7 +776,7 @@ typedef struct TileBuffer {
int col; // only used with multi-threaded decoding
} TileBuffer;
-static const uint8_t *decode_tiles(VP9D_COMP *pbi,
+static const uint8_t *decode_tiles(VP9Decoder *pbi,
const uint8_t *data,
const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
@@ -865,7 +865,7 @@ static int compare_tile_buffers(const void *a, const void *b) {
}
}
-static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
+static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
const uint8_t *data,
const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
@@ -882,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
assert(tile_rows == 1);
(void)tile_rows;
- if (num_workers > pbi->num_tile_workers) {
+ // TODO(jzern): See if we can remove the restriction of passing in max
+ // threads to the decoder.
+ if (pbi->num_tile_workers == 0) {
+ const int num_threads = pbi->oxcf.max_threads & ~1;
int i;
+ // TODO(jzern): Allocate one less worker, as in the current code we only
+ // use num_threads - 1 workers.
CHECK_MEM_ERROR(cm, pbi->tile_workers,
- vpx_realloc(pbi->tile_workers,
- num_workers * sizeof(*pbi->tile_workers)));
- for (i = pbi->num_tile_workers; i < num_workers; ++i) {
+ vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
+ for (i = 0; i < num_threads; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
@@ -895,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
CHECK_MEM_ERROR(cm, worker->data1,
vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
- if (i < num_workers - 1 && !vp9_worker_reset(worker)) {
+ if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Tile decoder thread creation failed");
}
@@ -903,7 +907,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
}
// Reset tile decoding hook
- for (n = 0; n < pbi->num_tile_workers; ++n) {
+ for (n = 0; n < num_workers; ++n) {
pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
}
@@ -1005,12 +1009,13 @@ static void error_handler(void *data) {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
}
-#define RESERVED \
- if (vp9_rb_read_bit(rb)) \
- vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \
- "Reserved bit must be unset")
+static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) {
+ int profile = vp9_rb_read_bit(rb);
+ profile |= vp9_rb_read_bit(rb) << 1;
+ return (BITSTREAM_PROFILE) profile;
+}
-static size_t read_uncompressed_header(VP9D_COMP *pbi,
+static size_t read_uncompressed_header(VP9Decoder *pbi,
struct vp9_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
size_t sz;
@@ -1022,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame marker");
- cm->version = vp9_rb_read_bit(rb);
- RESERVED;
+ cm->profile = read_profile(rb);
+ if (cm->profile >= MAX_PROFILES)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Unsupported bitstream profile");
cm->show_existing_frame = vp9_rb_read_bit(rb);
if (cm->show_existing_frame) {
@@ -1048,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
if (cm->frame_type == KEY_FRAME) {
check_sync_code(cm, rb);
-
+ if (cm->profile > PROFILE_1)
+ cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
if (cm->color_space != SRGB) {
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
- if (cm->version == 1) {
+ if (cm->profile >= PROFILE_1) {
cm->subsampling_x = vp9_rb_read_bit(rb);
cm->subsampling_y = vp9_rb_read_bit(rb);
vp9_rb_read_bit(rb); // has extra plane
@@ -1060,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
cm->subsampling_y = cm->subsampling_x = 1;
}
} else {
- if (cm->version == 1) {
+ if (cm->profile >= PROFILE_1) {
cm->subsampling_y = cm->subsampling_x = 0;
vp9_rb_read_bit(rb); // has extra plane
} else {
@@ -1147,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
return sz;
}
-static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
+static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
@@ -1247,7 +1255,7 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
}
#endif // NDEBUG
-int vp9_decode_frame(VP9D_COMP *pbi,
+int vp9_decode_frame(VP9Decoder *pbi,
const uint8_t *data, const uint8_t *data_end,
const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
@@ -1288,11 +1296,7 @@ int vp9_decode_frame(VP9D_COMP *pbi,
}
init_macroblockd(cm, &pbi->mb);
-
- if (cm->coding_use_prev_mi)
- set_prev_mi(cm);
- else
- cm->prev_mi = NULL;
+ cm->prev_mi = get_prev_mi(cm);
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index e474db495..8a19dafc5 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -17,11 +17,11 @@ extern "C" {
#endif
struct VP9Common;
-struct VP9Decompressor;
+struct VP9Decoder;
void vp9_init_dequantizer(struct VP9Common *cm);
-int vp9_decode_frame(struct VP9Decompressor *pbi,
+int vp9_decode_frame(struct VP9Decoder *pbi,
const uint8_t *data, const uint8_t *data_end,
const uint8_t **p_data_end);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fb3666cbe..fd74478e9 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -110,8 +110,8 @@ void vp9_initialize_dec() {
}
}
-VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
- VP9D_COMP *const pbi = vpx_memalign(32, sizeof(*pbi));
+VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
+ VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
if (!cm)
@@ -152,7 +152,7 @@ VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
return pbi;
}
-void vp9_decoder_remove(VP9D_COMP *pbi) {
+void vp9_decoder_remove(VP9Decoder *pbi) {
VP9_COMMON *const cm = &pbi->common;
int i;
@@ -182,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
a->uv_height == b->uv_height && a->uv_width == b->uv_width;
}
-vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi,
+vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd) {
VP9_COMMON *cm = &pbi->common;
@@ -252,7 +252,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
}
-int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) {
+int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) {
VP9_COMMON *cm = &pbi->common;
if (index < 0 || index >= REF_FRAMES)
@@ -263,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) {
}
/* If any buffer updating is signaled it should be done here. */
-static void swap_frame_buffers(VP9D_COMP *pbi) {
+static void swap_frame_buffers(VP9Decoder *pbi) {
int ref_index = 0, mask;
VP9_COMMON *const cm = &pbi->common;
@@ -287,7 +287,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
cm->frame_refs[ref_index].idx = INT_MAX;
}
-int vp9_receive_compressed_data(VP9D_COMP *pbi,
+int vp9_receive_compressed_data(VP9Decoder *pbi,
size_t size, const uint8_t **psource,
int64_t time_stamp) {
VP9_COMMON *const cm = &pbi->common;
@@ -403,7 +403,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
return retcode;
}
-int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
+int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
int64_t *time_stamp, int64_t *time_end_stamp,
vp9_ppflags_t *flags) {
int ret = -1;
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 2e8bebdae..c9dc25191 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -35,7 +35,7 @@ typedef struct {
int inv_tile_order;
} VP9D_CONFIG;
-typedef struct VP9Decompressor {
+typedef struct VP9Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
DECLARE_ALIGNED(16, VP9_COMMON, common);
@@ -59,20 +59,20 @@ typedef struct VP9Decompressor {
int num_tile_workers;
VP9LfSync lf_row_sync;
-} VP9D_COMP;
+} VP9Decoder;
void vp9_initialize_dec();
-int vp9_receive_compressed_data(struct VP9Decompressor *pbi,
+int vp9_receive_compressed_data(struct VP9Decoder *pbi,
size_t size, const uint8_t **dest,
int64_t time_stamp);
-int vp9_get_raw_frame(struct VP9Decompressor *pbi,
+int vp9_get_raw_frame(struct VP9Decoder *pbi,
YV12_BUFFER_CONFIG *sd,
int64_t *time_stamp, int64_t *time_end_stamp,
vp9_ppflags_t *flags);
-vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi,
+vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
@@ -80,13 +80,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
-int vp9_get_reference_dec(struct VP9Decompressor *pbi,
+int vp9_get_reference_dec(struct VP9Decoder *pbi,
int index, YV12_BUFFER_CONFIG **fb);
-struct VP9Decompressor *vp9_decoder_create(const VP9D_CONFIG *oxcf);
+struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf);
-void vp9_decoder_remove(struct VP9Decompressor *pbi);
+void vp9_decoder_remove(struct VP9Decoder *pbi);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 163936021..9b124c9d9 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -132,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
// threads.
-void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
+void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
VP9_COMMON *cm,
MACROBLOCKD *xd,
int frame_filter_level,
int y_only, int partial_frame) {
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
int i;
// Allocate memory used in thread synchronization.
@@ -168,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
// Set up loopfilter thread data.
- for (i = 0; i < pbi->num_tile_workers; ++i) {
+ // The decoder is using num_workers instead of pbi->num_tile_workers
+ // because it has been observed that using more threads on the
+ // loopfilter, than there are tile columns in the frame will hurt
+ // performance on Android. This is because the system will only
+ // schedule the tile decode workers on cores equal to the number
+ // of tile columns. Then if the decoder tries to use more threads for the
+ // loopfilter, it will hurt performance because of contention. If the
+ // multithreading code changes in the future then the number of workers
+ // used by the loopfilter should be revisited.
+ for (i = 0; i < num_workers; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
LFWorkerData *const lf_data = &tile_data->lfdata;
@@ -184,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
lf_data->y_only = y_only; // always do all planes in decoder
lf_data->lf_sync = &pbi->lf_row_sync;
- lf_data->num_lf_workers = pbi->num_tile_workers;
+ lf_data->num_lf_workers = num_workers;
// Start loopfiltering
- if (i == pbi->num_tile_workers - 1) {
+ if (i == num_workers - 1) {
vp9_worker_execute(worker);
} else {
vp9_worker_launch(worker);
@@ -195,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
}
// Wait till all rows are finished
- for (i = 0; i < pbi->num_tile_workers; ++i) {
+ for (i = 0; i < num_workers; ++i) {
vp9_worker_sync(&pbi->tile_workers[i]);
}
}
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index 2f65e1e30..005bd7bbd 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -18,7 +18,7 @@
struct macroblockd;
struct VP9Common;
-struct VP9Decompressor;
+struct VP9Decoder;
typedef struct TileWorkerData {
struct VP9Common *cm;
@@ -50,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
// Multi-threaded loopfilter that uses the tile threads.
-void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi,
+void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
struct VP9Common *cm,
struct macroblockd *xd,
int frame_filter_level,