summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp9/common/vp9_alloccommon.c12
-rw-r--r--vp9/common/vp9_blockd.h2
-rw-r--r--vp9/common/vp9_entropymode.c9
-rw-r--r--vp9/common/vp9_loopfilter.c40
-rw-r--r--vp9/common/vp9_loopfilter.h36
-rw-r--r--vp9/common/vp9_onyxc_int.h28
-rw-r--r--vp9/common/vp9_pred_common.h2
-rw-r--r--vp9/decoder/vp9_decodeframe.c28
-rw-r--r--vp9/decoder/vp9_decodemv.c4
-rw-r--r--vp9/decoder/vp9_dthread.c259
-rw-r--r--vp9/decoder/vp9_dthread.h60
-rw-r--r--vp9/decoder/vp9_onyxd_if.c50
-rw-r--r--vp9/decoder/vp9_onyxd_int.h3
-rw-r--r--vp9/decoder/vp9_thread.c110
-rw-r--r--vp9/decoder/vp9_thread.h119
-rw-r--r--vp9/encoder/vp9_bitstream.c4
-rw-r--r--vp9/encoder/vp9_encodeframe.c57
-rw-r--r--vp9/encoder/vp9_onyx_if.c46
-rw-r--r--vp9/encoder/vp9_onyx_int.h8
-rw-r--r--vp9/encoder/vp9_pickmode.c4
-rw-r--r--vp9/encoder/vp9_rdopt.c7
-rw-r--r--vp9/encoder/vp9_tokenize.c4
-rw-r--r--vp9/vp9dx.mk2
23 files changed, 628 insertions, 266 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index ada7c6c03..e033fbb99 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -34,7 +34,7 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) {
int i;
for (i = 0; i < FRAME_BUFFERS; i++)
- vp9_free_frame_buffer(&cm->yv12_fb[i]);
+ vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
vp9_free_frame_buffer(&cm->post_proc_buffer);
@@ -140,18 +140,18 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
vp9_free_frame_buffers(cm);
for (i = 0; i < FRAME_BUFFERS; i++) {
- cm->fb_idx_ref_cnt[i] = 0;
- if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
- VP9_ENC_BORDER_IN_PIXELS) < 0)
+ cm->frame_bufs[i].ref_count = 0;
+ if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height,
+ ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
}
cm->new_fb_idx = FRAME_BUFFERS - 1;
- cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
+ cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
for (i = 0; i < REF_FRAMES; i++) {
cm->ref_frame_map[i] = i;
- cm->fb_idx_ref_cnt[i] = 1;
+ cm->frame_bufs[i].ref_count = 1;
}
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 9e16d8f78..49e336aa4 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -30,7 +30,7 @@ extern "C" {
#endif
#define BLOCK_SIZE_GROUPS 4
-#define MBSKIP_CONTEXTS 3
+#define SKIP_CONTEXTS 3
#define INTER_MODE_CONTEXTS 7
/* Segment Feature Masks */
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 77b8de046..6def3c869 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -303,7 +303,7 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
}
-static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
+static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = {
192, 128, 64
};
@@ -325,7 +325,7 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p);
vp9_copy(cm->fc.single_ref_prob, default_single_ref_p);
cm->fc.tx_probs = default_tx_probs;
- vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
+ vp9_copy(cm->fc.skip_probs, default_skip_probs);
vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs);
}
@@ -415,9 +415,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
}
}
- for (i = 0; i < MBSKIP_CONTEXTS; ++i)
- fc->mbskip_probs[i] = adapt_prob(pre_fc->mbskip_probs[i],
- counts->mbskip[i]);
+ for (i = 0; i < SKIP_CONTEXTS; ++i)
+ fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]);
}
static void set_default_lf_deltas(struct loopfilter *lf) {
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 2266e0ec2..dd304c909 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -16,26 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
-// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
-// Each 1 bit represents a position in which we want to apply the loop filter.
-// Left_ entries refer to whether we apply a filter on the border to the
-// left of the block. Above_ entries refer to whether or not to apply a
-// filter on the above border. Int_ entries refer to whether or not to
-// apply borders on the 4x4 edges within the 8x8 block that each bit
-// represents.
-// Since each transform is accompanied by a potentially different type of
-// loop filter there is a different entry in the array for each transform size.
-typedef struct {
- uint64_t left_y[TX_SIZES];
- uint64_t above_y[TX_SIZES];
- uint64_t int_4x4_y;
- uint16_t left_uv[TX_SIZES];
- uint16_t above_uv[TX_SIZES];
- uint16_t int_4x4_uv;
- uint8_t lfl_y[64];
- uint8_t lfl_uv[16];
-} LOOP_FILTER_MASK;
-
// 64 bit masks for left transform size. Each 1 represents a position where
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
@@ -638,9 +618,9 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
// TODO(JBB): This function only works for yv12.
-static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
- MODE_INFO **mi_8x8, const int mode_info_stride,
- LOOP_FILTER_MASK *lfm) {
+void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
+ MODE_INFO **mi_8x8, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm) {
int idx_32, idx_16, idx_8;
const loop_filter_info_n *const lfi_n = &cm->lf_info;
MODE_INFO **mip = mi_8x8;
@@ -1069,10 +1049,10 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
}
#endif
-static void filter_block_plane(VP9_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row,
- LOOP_FILTER_MASK *lfm) {
+void vp9_filter_block_plane(VP9_COMMON *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row,
+ LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t* const dst0 = dst->buf;
int r, c;
@@ -1244,14 +1224,14 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
#if CONFIG_NON420
if (use_420)
#endif
- setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
- &lfm);
+ vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
+ cm->mode_info_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
#if CONFIG_NON420
if (use_420)
#endif
- filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+ vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
#if CONFIG_NON420
else
filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 43373f4b1..668e898cf 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -60,9 +60,42 @@ typedef struct {
uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
} loop_filter_info_n;
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block. Above_ entries refer to whether or not to apply a
+// filter on the above border. Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+ uint64_t left_y[TX_SIZES];
+ uint64_t above_y[TX_SIZES];
+ uint64_t int_4x4_y;
+ uint16_t left_uv[TX_SIZES];
+ uint16_t above_uv[TX_SIZES];
+ uint16_t int_4x4_uv;
+ uint8_t lfl_y[64];
+ uint8_t lfl_uv[16];
+} LOOP_FILTER_MASK;
+
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
+struct VP9LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp9_setup_mask(struct VP9Common *const cm,
+ const int mi_row, const int mi_col,
+ MODE_INFO **mi_8x8, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane(struct VP9Common *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row,
+ LOOP_FILTER_MASK *lfm);
void vp9_loop_filter_init(struct VP9Common *cm);
@@ -90,6 +123,9 @@ typedef struct LoopFilterWorkerData {
int start;
int stop;
int y_only;
+
+ struct VP9LfSyncData *lf_sync;
+ int num_lf_workers;
} LFWorkerData;
// Operates on the rows described by LFWorkerData passed as 'arg1'.
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 894134a0f..d92a25b12 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -60,7 +60,7 @@ typedef struct frame_contexts {
vp9_prob single_ref_prob[REF_CONTEXTS][2];
vp9_prob comp_ref_prob[REF_CONTEXTS];
struct tx_probs tx_probs;
- vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
+ vp9_prob skip_probs[SKIP_CONTEXTS];
nmv_context nmvc;
} FRAME_CONTEXT;
@@ -79,7 +79,7 @@ typedef struct {
unsigned int single_ref[REF_CONTEXTS][2][2];
unsigned int comp_ref[REF_CONTEXTS][2];
struct tx_counts tx;
- unsigned int mbskip[MBSKIP_CONTEXTS][2];
+ unsigned int skip[SKIP_CONTEXTS][2];
nmv_context_counts mv;
} FRAME_COUNTS;
@@ -91,6 +91,12 @@ typedef enum {
REFERENCE_MODES = 3,
} REFERENCE_MODE;
+
+typedef struct {
+ int ref_count;
+ YV12_BUFFER_CONFIG buf;
+} RefCntBuffer;
+
typedef struct VP9Common {
struct vpx_internal_error_info error;
@@ -117,8 +123,8 @@ typedef struct VP9Common {
YV12_BUFFER_CONFIG *frame_to_show;
- YV12_BUFFER_CONFIG yv12_fb[FRAME_BUFFERS];
- int fb_idx_ref_cnt[FRAME_BUFFERS]; /* reference counts */
+ RefCntBuffer frame_bufs[FRAME_BUFFERS];
+
int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
// TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
@@ -220,29 +226,29 @@ typedef struct VP9Common {
} VP9_COMMON;
static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
- return &cm->yv12_fb[cm->new_fb_idx];
+ return &cm->frame_bufs[cm->new_fb_idx].buf;
}
static int get_free_fb(VP9_COMMON *cm) {
int i;
for (i = 0; i < FRAME_BUFFERS; i++)
- if (cm->fb_idx_ref_cnt[i] == 0)
+ if (cm->frame_bufs[i].ref_count == 0)
break;
assert(i < FRAME_BUFFERS);
- cm->fb_idx_ref_cnt[i] = 1;
+ cm->frame_bufs[i].ref_count = 1;
return i;
}
-static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
+static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
const int ref_index = *idx;
- if (ref_index >= 0 && buf[ref_index] > 0)
- buf[ref_index]--;
+ if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
+ bufs[ref_index].ref_count--;
*idx = new_idx;
- buf[new_idx]++;
+ bufs[new_idx].ref_count++;
}
static int mi_cols_aligned_to_sb(int n_mis) {
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 26edf7810..0acee32f8 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -54,7 +54,7 @@ static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.mbskip_probs[vp9_get_skip_context(xd)];
+ return cm->fc.skip_probs[vp9_get_skip_context(xd)];
}
int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 1d9be5322..be8176a3f 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -33,18 +33,12 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_dsubexp.h"
+#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/decoder/vp9_reader.h"
#include "vp9/decoder/vp9_thread.h"
-typedef struct TileWorkerData {
- VP9_COMMON *cm;
- vp9_reader bit_reader;
- DECLARE_ALIGNED(16, MACROBLOCKD, xd);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
-} TileWorkerData;
-
static int read_be32(const uint8_t *p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
}
@@ -982,7 +976,6 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
++pbi->num_tile_workers;
vp9_worker_init(worker);
- worker->hook = (VP9WorkerHook)tile_worker_hook;
CHECK_MEM_ERROR(cm, worker->data1,
vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
@@ -993,6 +986,11 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
}
}
+ // Reset tile decoding hook
+ for (n = 0; n < pbi->num_tile_workers; ++n) {
+ pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
+ }
+
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
vpx_memset(pbi->above_context[0], 0,
@@ -1117,7 +1115,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
if (cm->show_existing_frame) {
// Show an existing frame directly.
int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->new_fb_idx, frame_to_show);
+ ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
pbi->refresh_frame_flags = 0;
cm->lf.filter_level = 0;
cm->show_frame = 1;
@@ -1177,7 +1175,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
const int idx = cm->ref_frame_map[ref];
cm->frame_refs[i].idx = idx;
- cm->frame_refs[i].buf = &cm->yv12_fb[idx];
+ cm->frame_refs[i].buf = &cm->frame_bufs[idx].buf;
cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
}
@@ -1245,8 +1243,8 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r);
- for (k = 0; k < MBSKIP_CONTEXTS; ++k)
- vp9_diff_update_prob(&r, &fc->mbskip_probs[k]);
+ for (k = 0; k < SKIP_CONTEXTS; ++k)
+ vp9_diff_update_prob(&r, &fc->skip_probs[k]);
if (!frame_is_intra_only(cm)) {
nmv_context *const nmvc = &fc->nmvc;
@@ -1321,8 +1319,7 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
sizeof(cm->counts.comp_ref)));
assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
- assert(!memcmp(cm->counts.mbskip, zero_counts.mbskip,
- sizeof(cm->counts.mbskip)));
+ assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
}
#endif // NDEBUG
@@ -1393,9 +1390,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
*p_data_end = decode_tiles(pbi, data + first_partition_size);
}
- cm->last_width = cm->width;
- cm->last_height = cm->height;
-
new_fb->corrupted |= xd->corrupted;
if (!pbi->decoded_key_frame) {
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index d7cd635bd..e671f0dba 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -152,9 +152,9 @@ static int read_skip_coeff(VP9_COMMON *cm, const MACROBLOCKD *xd,
return 1;
} else {
const int ctx = vp9_get_skip_context(xd);
- const int skip = vp9_read(r, cm->fc.mbskip_probs[ctx]);
+ const int skip = vp9_read(r, cm->fc.skip_probs[ctx]);
if (!cm->frame_parallel_decoding_mode)
- ++cm->counts.mbskip[ctx][skip];
+ ++cm->counts.skip[ctx][skip];
return skip;
}
}
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
new file mode 100644
index 000000000..280e351ae
--- /dev/null
+++ b/vp9/decoder/vp9_dthread.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+
+#if CONFIG_MULTITHREAD
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+ const int kMaxTryLocks = 4000;
+ int locked = 0;
+ int i;
+
+ for (i = 0; i < kMaxTryLocks; ++i) {
+ if (!pthread_mutex_trylock(mutex)) {
+ locked = 1;
+ break;
+ }
+ }
+
+ if (!locked)
+ pthread_mutex_lock(mutex);
+}
+#endif // CONFIG_MULTITHREAD
+
+static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+ const int nsync = lf_sync->sync_range;
+
+ if (r && !(c & (nsync - 1))) {
+ mutex_lock(&lf_sync->mutex_[r - 1]);
+
+ while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
+ pthread_cond_wait(&lf_sync->cond_[r - 1],
+ &lf_sync->mutex_[r - 1]);
+ }
+ pthread_mutex_unlock(&lf_sync->mutex_[r - 1]);
+ }
+#else
+ (void)lf_sync;
+ (void)r;
+ (void)c;
+#endif // CONFIG_MULTITHREAD
+}
+
+static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
+ const int sb_cols) {
+#if CONFIG_MULTITHREAD
+ const int nsync = lf_sync->sync_range;
+ int cur;
+ // Only signal when there are enough filtered SB for next row to run.
+ int sig = 1;
+
+ if (c < sb_cols - 1) {
+ cur = c;
+ if (c % nsync)
+ sig = 0;
+ } else {
+ cur = sb_cols + nsync;
+ }
+
+ if (sig) {
+ mutex_lock(&lf_sync->mutex_[r]);
+
+ lf_sync->cur_sb_col[r] = cur;
+
+ pthread_cond_signal(&lf_sync->cond_[r]);
+ pthread_mutex_unlock(&lf_sync->mutex_[r]);
+ }
+#else
+ (void)lf_sync;
+ (void)r;
+ (void)c;
+ (void)sb_cols;
+#endif // CONFIG_MULTITHREAD
+}
+
+// Implement row loopfiltering for each thread.
+static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
+ VP9_COMMON *const cm, MACROBLOCKD *const xd,
+ int start, int stop, int y_only,
+ VP9LfSync *const lf_sync, int num_lf_workers) {
+ const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ int r, c; // SB row and col
+ LOOP_FILTER_MASK lfm;
+ const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
+
+ for (r = start; r < stop; r += num_lf_workers) {
+ const int mi_row = r << MI_BLOCK_SIZE_LOG2;
+ MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
+
+ for (c = 0; c < sb_cols; ++c) {
+ const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+ int plane;
+
+ sync_read(lf_sync, r, c);
+
+ setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+ vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
+ &lfm);
+
+ for (plane = 0; plane < num_planes; ++plane) {
+ vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+ }
+
+ sync_write(lf_sync, r, c, sb_cols);
+ }
+ }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int loop_filter_row_worker(void *arg1, void *arg2) {
+ TileWorkerData *const tile_data = (TileWorkerData*)arg1;
+ LFWorkerData *const lf_data = &tile_data->lfdata;
+
+ loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_data->lf_sync, lf_data->num_lf_workers);
+ return 1;
+}
+
+// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
+// threads.
+void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
+ VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int frame_filter_level,
+ int y_only, int partial) {
+ // Number of superblock rows and cols
+ const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ int i;
+
+ // Allocate memory used in thread synchronization.
+ // This always needs to be done even if frame_filter_level is 0.
+ if (!cm->current_video_frame || cm->last_height != cm->height) {
+ VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+
+ if (cm->last_height != cm->height) {
+ const int aligned_last_height =
+ ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);
+ const int last_sb_rows =
+ mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >>
+ MI_BLOCK_SIZE_LOG2;
+
+ vp9_loop_filter_dealloc(lf_sync, last_sb_rows);
+ }
+
+ vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width);
+ }
+
+ if (!frame_filter_level) return;
+
+ vp9_loop_filter_frame_init(cm, frame_filter_level);
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,
+ sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
+
+ // Set up loopfilter thread data.
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ VP9Worker *const worker = &pbi->tile_workers[i];
+ TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
+ LFWorkerData *const lf_data = &tile_data->lfdata;
+
+ worker->hook = (VP9WorkerHook)loop_filter_row_worker;
+
+ // Loopfilter data
+ lf_data->frame_buffer = get_frame_new_buffer(cm);
+ lf_data->cm = cm;
+ lf_data->xd = pbi->mb;
+ lf_data->start = i;
+ lf_data->stop = sb_rows;
+ lf_data->y_only = y_only; // always do all planes in decoder
+
+ lf_data->lf_sync = &pbi->lf_row_sync;
+ lf_data->num_lf_workers = pbi->num_tile_workers;
+
+ // Start loopfiltering
+ if (i == pbi->num_tile_workers - 1) {
+ vp9_worker_execute(worker);
+ } else {
+ vp9_worker_launch(worker);
+ }
+ }
+
+ // Wait till all rows are finished
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ vp9_worker_sync(&pbi->tile_workers[i]);
+ }
+}
+
+// Set up nsync by width.
+static int get_sync_range(int width) {
+ // nsync numbers are picked by testing. For example, for 4k
+ // video, using 4 gives best performance.
+ if (width < 640)
+ return 1;
+ else if (width <= 1280)
+ return 2;
+ else if (width <= 4096)
+ return 4;
+ else
+ return 8;
+}
+
+// Allocate memory for lf row synchronization
+void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
+ int width) {
+#if CONFIG_MULTITHREAD
+ int i;
+
+ CHECK_MEM_ERROR(cm, lf_sync->mutex_,
+ vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+ CHECK_MEM_ERROR(cm, lf_sync->cond_,
+ vpx_malloc(sizeof(*lf_sync->cond_) * rows));
+
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+ pthread_cond_init(&lf_sync->cond_[i], NULL);
+ }
+#endif // CONFIG_MULTITHREAD
+
+ CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
+ vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+
+ // Set up nsync.
+ lf_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate lf synchronization related mutex and data
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
+#if CONFIG_MULTITHREAD
+ if (lf_sync != NULL) {
+ int i;
+
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex_[i]);
+ pthread_cond_destroy(&lf_sync->cond_[i]);
+ }
+
+ vpx_free(lf_sync->mutex_);
+ vpx_free(lf_sync->cond_);
+ vpx_free(lf_sync->cur_sb_col);
+ }
+#else
+ (void)rows;
+ if (lf_sync != NULL)
+ vpx_free(lf_sync->cur_sb_col);
+#endif // CONFIG_MULTITHREAD
+}
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
new file mode 100644
index 000000000..4478354ba
--- /dev/null
+++ b/vp9/decoder/vp9_dthread.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_DECODER_VP9_DTHREAD_H_
+#define VP9_DECODER_VP9_DTHREAD_H_
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/decoder/vp9_reader.h"
+#include "vp9/decoder/vp9_thread.h"
+
+struct macroblockd;
+struct VP9Common;
+struct VP9Decompressor;
+
+typedef struct TileWorkerData {
+ struct VP9Common *cm;
+ vp9_reader bit_reader;
+ DECLARE_ALIGNED(16, struct macroblockd, xd);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+
+ // Row-based parallel loopfilter data
+ LFWorkerData lfdata;
+} TileWorkerData;
+
+// Loopfilter row synchronization
+typedef struct VP9LfSyncData {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *mutex_;
+ pthread_cond_t *cond_;
+#endif
+ // Allocate memory to store the loop-filtered superblock index in each row.
+ int *cur_sb_col;
+ // The optimal sync_range for different resolution and platform should be
+ // determined by testing. Currently, it is chosen to be a power-of-2 number.
+ int sync_range;
+} VP9LfSync;
+
+// Allocate memory for loopfilter row synchronization.
+void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
+ int rows, int width);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
+
+// Multi-threaded loopfilter that uses the tile threads.
+void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi,
+ struct VP9Common *cm,
+ struct macroblockd *xd,
+ int frame_filter_level,
+ int y_only, int partial);
+
+#endif // VP9_DECODER_VP9_DTHREAD_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index c6e9205bb..803d536ba 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -27,6 +27,7 @@
#include "vpx_ports/vpx_timer.h"
#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_detokenize.h"
+#include "vp9/decoder/vp9_dthread.h"
#include "./vpx_scale_rtcd.h"
#define WRITE_RECON_BUFFER 0
@@ -177,13 +178,24 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
vpx_free(worker->data2);
}
vpx_free(pbi->tile_workers);
+
+ if (pbi->num_tile_workers) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int sb_rows =
+ mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+
+ vp9_loop_filter_dealloc(lf_sync, sb_rows);
+ }
+
vpx_free(pbi->mi_streams);
vpx_free(pbi->above_context[0]);
vpx_free(pbi->above_seg_context);
vpx_free(pbi);
}
-static int equal_dimensions(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
+static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b) {
return a->y_height == b->y_height && a->y_width == b->y_width &&
a->uv_height == b->uv_height && a->uv_width == b->uv_width;
}
@@ -200,7 +212,8 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr,
* later commit that adds VP9-specific controls for this functionality.
*/
if (ref_frame_flag == VP9_LAST_FLAG) {
- YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[0]];
+ const YV12_BUFFER_CONFIG *const cfg =
+ &cm->frame_bufs[cm->ref_frame_map[0]].buf;
if (!equal_dimensions(cfg, sd))
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Incorrect buffer dimensions");
@@ -246,13 +259,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
// Find an empty frame buffer.
const int free_fb = get_free_fb(cm);
- // Decrease fb_idx_ref_cnt since it will be increased again in
+ // Decrease ref_count since it will be increased again in
// ref_cnt_fb() below.
- cm->fb_idx_ref_cnt[free_fb]--;
+ cm->frame_bufs[free_fb].ref_count--;
// Manage the reference counters and copy image.
- ref_cnt_fb(cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb);
- ref_buf->buf = &cm->yv12_fb[*ref_fb_ptr];
+ ref_cnt_fb(cm->frame_bufs, ref_fb_ptr, free_fb);
+ ref_buf->buf = &cm->frame_bufs[*ref_fb_ptr].buf;
vp8_yv12_copy_frame(sd, ref_buf->buf);
}
@@ -267,7 +280,7 @@ int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
if (index < 0 || index >= REF_FRAMES)
return -1;
- *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
+ *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
return 0;
}
@@ -278,13 +291,13 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
if (mask & 1)
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->ref_frame_map[ref_index],
+ ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index],
cm->new_fb_idx);
++ref_index;
}
cm->frame_to_show = get_frame_new_buffer(cm);
- cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ cm->frame_bufs[cm->new_fb_idx].ref_count--;
// Invalidate these references until the next frame starts.
for (ref_index = 0; ref_index < 3; ref_index++)
@@ -340,8 +353,8 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
if (cm->frame_refs[0].idx != INT_MAX)
cm->frame_refs[0].buf->corrupted = 1;
- if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
- cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
+ cm->frame_bufs[cm->new_fb_idx].ref_count--;
return -1;
}
@@ -353,8 +366,8 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
if (retcode < 0) {
cm->error.error_code = VPX_CODEC_ERROR;
cm->error.setjmp = 0;
- if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
- cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
+ cm->frame_bufs[cm->new_fb_idx].ref_count--;
return retcode;
}
@@ -370,7 +383,13 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
#endif
if (!pbi->do_loopfilter_inline) {
- vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0);
+ // If multiple threads are used to decode tiles, then we use those threads
+ // to do parallel loopfiltering.
+ if (pbi->num_tile_workers) {
+ vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+ } else {
+ vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+ }
}
#if WRITE_RECON_BUFFER == 2
@@ -390,6 +409,9 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
vp9_clear_system_state();
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+
if (!cm->show_existing_frame)
cm->last_show_frame = cm->show_frame;
if (cm->show_frame) {
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 242c600cc..6c6c23926 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_thread.h"
@@ -49,6 +50,8 @@ typedef struct VP9Decompressor {
VP9Worker *tile_workers;
int num_tile_workers;
+ VP9LfSync lf_row_sync;
+
/* Each tile column has its own MODE_INFO stream. This array indexes them by
tile column index. */
MODE_INFO **mi_streams;
diff --git a/vp9/decoder/vp9_thread.c b/vp9/decoder/vp9_thread.c
index d953e72b3..5d31d3d98 100644
--- a/vp9/decoder/vp9_thread.c
+++ b/vp9/decoder/vp9_thread.c
@@ -24,116 +24,6 @@ extern "C" {
#if CONFIG_MULTITHREAD
-#if defined(_WIN32)
-
-//------------------------------------------------------------------------------
-// simplistic pthread emulation layer
-
-#include <process.h> // NOLINT
-
-// _beginthreadex requires __stdcall
-#define THREADFN unsigned int __stdcall
-#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
-
-static int pthread_create(pthread_t* const thread, const void* attr,
- unsigned int (__stdcall *start)(void*), void* arg) {
- (void)attr;
- *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
- 0, /* unsigned stack_size */
- start,
- arg,
- 0, /* unsigned initflag */
- NULL); /* unsigned *thrdaddr */
- if (*thread == NULL) return 1;
- SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
- return 0;
-}
-
-static int pthread_join(pthread_t thread, void** value_ptr) {
- (void)value_ptr;
- return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
- CloseHandle(thread) == 0);
-}
-
-// Mutex
-static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
- (void)mutexattr;
- InitializeCriticalSection(mutex);
- return 0;
-}
-
-static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
- EnterCriticalSection(mutex);
- return 0;
-}
-
-static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
- LeaveCriticalSection(mutex);
- return 0;
-}
-
-static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
- DeleteCriticalSection(mutex);
- return 0;
-}
-
-// Condition
-static int pthread_cond_destroy(pthread_cond_t* const condition) {
- int ok = 1;
- ok &= (CloseHandle(condition->waiting_sem_) != 0);
- ok &= (CloseHandle(condition->received_sem_) != 0);
- ok &= (CloseHandle(condition->signal_event_) != 0);
- return !ok;
-}
-
-static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
- (void)cond_attr;
- condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
- condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
- condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
- if (condition->waiting_sem_ == NULL ||
- condition->received_sem_ == NULL ||
- condition->signal_event_ == NULL) {
- pthread_cond_destroy(condition);
- return 1;
- }
- return 0;
-}
-
-static int pthread_cond_signal(pthread_cond_t* const condition) {
- int ok = 1;
- if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
- // a thread is waiting in pthread_cond_wait: allow it to be notified
- ok = SetEvent(condition->signal_event_);
- // wait until the event is consumed so the signaler cannot consume
- // the event via its own pthread_cond_wait.
- ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
- WAIT_OBJECT_0);
- }
- return !ok;
-}
-
-static int pthread_cond_wait(pthread_cond_t* const condition,
- pthread_mutex_t* const mutex) {
- int ok;
- // note that there is a consumer available so the signal isn't dropped in
- // pthread_cond_signal
- if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
- return 1;
- // now unlock the mutex so pthread_cond_signal may be issued
- pthread_mutex_unlock(mutex);
- ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
- WAIT_OBJECT_0);
- ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
- pthread_mutex_lock(mutex);
- return !ok;
-}
-
-#else // _WIN32
-# define THREADFN void*
-# define THREAD_RETURN(val) val
-#endif
-
//------------------------------------------------------------------------------
static THREADFN thread_loop(void *ptr) { // thread loop
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index bc69cfa1f..2f8728dcf 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -26,7 +26,8 @@ extern "C" {
#if CONFIG_MULTITHREAD
#if defined(_WIN32)
-
+#include <errno.h> // NOLINT
+#include <process.h> // NOLINT
#include <windows.h> // NOLINT
typedef HANDLE pthread_t;
typedef CRITICAL_SECTION pthread_mutex_t;
@@ -36,12 +37,120 @@ typedef struct {
HANDLE signal_event_;
} pthread_cond_t;
-#else
-
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static INLINE int pthread_create(pthread_t* const thread, const void* attr,
+ unsigned int (__stdcall *start)(void*),
+ void* arg) {
+ (void)attr;
+ *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
+ 0, /* unsigned stack_size */
+ start,
+ arg,
+ 0, /* unsigned initflag */
+ NULL); /* unsigned *thrdaddr */
+ if (*thread == NULL) return 1;
+ SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+ return 0;
+}
+
+static INLINE int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+ CloseHandle(thread) == 0);
+}
+
+// Mutex
+static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex,
+ void* mutexattr) {
+ (void)mutexattr;
+ InitializeCriticalSection(mutex);
+ return 0;
+}
+
+static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) {
+ return TryEnterCriticalSection(mutex) ? 0 : EBUSY;
+}
+
+static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) {
+ EnterCriticalSection(mutex);
+ return 0;
+}
+
+static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
+ LeaveCriticalSection(mutex);
+ return 0;
+}
+
+static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
+ DeleteCriticalSection(mutex);
+ return 0;
+}
+
+// Condition
+static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) {
+ int ok = 1;
+ ok &= (CloseHandle(condition->waiting_sem_) != 0);
+ ok &= (CloseHandle(condition->received_sem_) != 0);
+ ok &= (CloseHandle(condition->signal_event_) != 0);
+ return !ok;
+}
+
+static INLINE int pthread_cond_init(pthread_cond_t *const condition,
+ void* cond_attr) {
+ (void)cond_attr;
+ condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (condition->waiting_sem_ == NULL ||
+ condition->received_sem_ == NULL ||
+ condition->signal_event_ == NULL) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ return 0;
+}
+
+static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
+ int ok = 1;
+ if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+ // a thread is waiting in pthread_cond_wait: allow it to be notified
+ ok = SetEvent(condition->signal_event_);
+ // wait until the event is consumed so the signaler cannot consume
+ // the event via its own pthread_cond_wait.
+ ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+ WAIT_OBJECT_0);
+ }
+ return !ok;
+}
+
+static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
+ pthread_mutex_t *const mutex) {
+ int ok;
+ // note that there is a consumer available so the signal isn't dropped in
+ // pthread_cond_signal
+ if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+ return 1;
+ // now unlock the mutex so pthread_cond_signal may be issued
+ pthread_mutex_unlock(mutex);
+ ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+ WAIT_OBJECT_0);
+ ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+ pthread_mutex_lock(mutex);
+ return !ok;
+}
+#else // _WIN32
#include <pthread.h> // NOLINT
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
-#endif /* _WIN32 */
-#endif /* CONFIG_MULTITHREAD */
+#endif // CONFIG_MULTITHREAD
// State of the worker thread object
typedef enum {
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 8db517a9a..ede744e7f 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -124,8 +124,8 @@ static int write_skip_coeff(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
void vp9_update_skip_probs(VP9_COMMON *cm, vp9_writer *w) {
int k;
- for (k = 0; k < MBSKIP_CONTEXTS; ++k)
- vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], cm->counts.mbskip[k]);
+ for (k = 0; k < SKIP_CONTEXTS; ++k)
+ vp9_cond_prob_diff_update(w, &cm->fc.skip_probs[k], cm->counts.skip[k]);
}
static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index bf60027a5..7b6da6c39 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2156,7 +2156,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
vp9_zero(cm->counts.single_ref);
vp9_zero(cm->counts.comp_ref);
vp9_zero(cm->counts.tx);
- vp9_zero(cm->counts.mbskip);
+ vp9_zero(cm->counts.skip);
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
@@ -2411,15 +2411,15 @@ static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
}
}
-static int get_frame_type(VP9_COMP *cpi) {
+static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) {
if (frame_is_intra_only(&cpi->common))
- return 0;
+ return INTRA_FRAME;
else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
- return 3;
+ return ALTREF_FRAME;
else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
- return 1;
+ return LAST_FRAME;
else
- return 2;
+ return GOLDEN_FRAME;
}
static void select_tx_mode(VP9_COMP *cpi) {
@@ -2734,7 +2734,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
if (cpi->sf.RD) {
int i;
REFERENCE_MODE reference_mode;
- INTERP_FILTER interp_filter;
/*
* This code does a single RD pass over the whole frame assuming
* either compound, single or hybrid prediction as per whatever has
@@ -2744,7 +2743,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
* that for subsequent frames.
* It does the same analysis for transform size selection also.
*/
- const int frame_type = get_frame_type(cpi);
+ const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
@@ -2762,22 +2761,18 @@ void vp9_encode_frame(VP9_COMP *cpi) {
else
reference_mode = REFERENCE_MODE_SELECT;
- /* filter type selection */
- // FIXME(rbultje) for some odd reason, we often select smooth_filter
- // as default filter for ARF overlay frames. This is a REALLY BAD
- // IDEA so we explicitly disable it here.
- if (frame_type != 3 &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
- interp_filter = EIGHTTAP_SMOOTH;
- } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
- filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
- interp_filter = EIGHTTAP_SHARP;
- } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
- interp_filter = EIGHTTAP;
- } else {
- interp_filter = SWITCHABLE;
+ if (cm->interp_filter == SWITCHABLE) {
+ if (frame_type != ALTREF_FRAME &&
+ filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
+ filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
+ filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
+ cm->interp_filter = EIGHTTAP_SMOOTH;
+ } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
+ filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
+ cm->interp_filter = EIGHTTAP_SHARP;
+ } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
+ cm->interp_filter = EIGHTTAP;
+ }
}
cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
@@ -2785,9 +2780,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
/* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
select_tx_mode(cpi);
cm->reference_mode = reference_mode;
- cm->interp_filter = interp_filter;
- if (cpi->oxcf.mode == MODE_REALTIME)
+ if (cpi->sf.super_fast_rtc)
encode_rtc_frame_internal(cpi);
else
encode_frame_internal(cpi);
@@ -2868,7 +2862,12 @@ void vp9_encode_frame(VP9_COMP *cpi) {
}
}
} else {
- encode_rtc_frame_internal(cpi);
+ // Force the usage of the BILINEAR interp_filter.
+ cm->interp_filter = BILINEAR;
+ if (cpi->sf.super_fast_rtc)
+ encode_rtc_frame_internal(cpi);
+ else
+ encode_frame_internal(cpi);
}
}
@@ -2945,7 +2944,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
(cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
- cpi->oxcf.mode != MODE_REALTIME;
+ !cpi->sf.super_fast_rtc;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
@@ -3001,7 +3000,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
} else {
mbmi->skip_coeff = 1;
if (output_enabled)
- cm->counts.mbskip[vp9_get_skip_context(xd)][1]++;
+ cm->counts.skip[vp9_get_skip_context(xd)][1]++;
reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 2d20f7ed3..a9b0718c8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -850,6 +850,10 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
}
sf->use_fast_lpf_pick = 2;
+ sf->RD = 0;
+ }
+ if (speed >= 6) {
+ sf->super_fast_rtc = 1;
}
}
@@ -908,6 +912,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_fast_coef_updates = 0;
sf->using_small_partition_info = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
+ sf->super_fast_rtc = 0;
switch (cpi->oxcf.mode) {
case MODE_BESTQUALITY:
@@ -917,8 +922,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
case MODE_FIRSTPASS:
case MODE_GOODQUALITY:
case MODE_SECONDPASS:
- set_good_speed_feature(cm, sf, speed);
- break;
+ set_good_speed_feature(cm, sf, speed);
break;
case MODE_REALTIME:
set_rt_speed_feature(cm, sf, speed);
@@ -2193,7 +2197,7 @@ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
if (index < 0 || index >= REF_FRAMES)
return -1;
- *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
+ *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
return 0;
}
@@ -2485,9 +2489,9 @@ static void update_reference_frames(VP9_COMP * const cpi) {
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
if (cm->frame_type == KEY_FRAME) {
- ref_cnt_fb(cm->fb_idx_ref_cnt,
+ ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
- ref_cnt_fb(cm->fb_idx_ref_cnt,
+ ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
}
#if CONFIG_MULTIPLE_ARF
@@ -2508,7 +2512,7 @@ static void update_reference_frames(VP9_COMP * const cpi) {
*/
int tmp;
- ref_cnt_fb(cm->fb_idx_ref_cnt,
+ ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
tmp = cpi->alt_fb_idx;
@@ -2522,18 +2526,18 @@ static void update_reference_frames(VP9_COMP * const cpi) {
arf_idx = cpi->arf_buffer_idx[cpi->sequence_number + 1];
}
#endif
- ref_cnt_fb(cm->fb_idx_ref_cnt,
+ ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[arf_idx], cm->new_fb_idx);
}
if (cpi->refresh_golden_frame) {
- ref_cnt_fb(cm->fb_idx_ref_cnt,
+ ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
}
}
if (cpi->refresh_last_frame) {
- ref_cnt_fb(cm->fb_idx_ref_cnt,
+ ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
}
}
@@ -2550,10 +2554,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vpx_usec_timer_start(&timer);
- if (cpi->oxcf.mode == MODE_REALTIME)
- lf->filter_level = 4;
- else
- vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
+ vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
@@ -2574,20 +2575,20 @@ static void scale_references(VP9_COMP *cpi) {
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[idx];
+ YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
if (ref->y_crop_width != cm->width ||
ref->y_crop_height != cm->height) {
const int new_fb = get_free_fb(cm);
- vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb],
+ vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
VP9_ENC_BORDER_IN_PIXELS);
- scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
+ scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
} else {
cpi->scaled_ref_idx[ref_frame - 1] = idx;
- cm->fb_idx_ref_cnt[idx]++;
+ cm->frame_bufs[idx].ref_count++;
}
}
}
@@ -2597,7 +2598,7 @@ static void release_scaled_references(VP9_COMP *cpi) {
int i;
for (i = 0; i < 3; i++)
- cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--;
+ cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--;
}
static void full_to_model_count(unsigned int *model_count,
@@ -2742,7 +2743,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if (cpi->sf.recode_loop != 0) {
vp9_save_coding_context(cpi);
cpi->dummy_packing = 1;
- if (cpi->oxcf.mode != MODE_REALTIME)
+ if (!cpi->sf.super_fast_rtc)
vp9_pack_bitstream(cpi, dest, size);
cpi->rc.projected_frame_size = (*size) << 3;
@@ -3101,7 +3102,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// JBB : This is realtime mode. In real time mode the first frame
// should be larger. Q of 0 is disabled because we force tx size to be
// 16x16...
- if (cpi->oxcf.mode == MODE_REALTIME) {
+ if (cpi->sf.super_fast_rtc) {
if (cpi->common.current_video_frame == 0)
q /= 3;
@@ -3566,7 +3567,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
/* find a free buffer for the new frame, releasing the reference previously
* held.
*/
- cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ cm->frame_bufs[cm->new_fb_idx].ref_count--;
cm->new_fb_idx = get_free_fb(cm);
#if CONFIG_MULTIPLE_ARF
@@ -3590,8 +3591,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *const buf = &cm->yv12_fb[idx];
-
+ YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
ref_buf->buf = buf;
ref_buf->idx = idx;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 82b990170..d928312b6 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -416,6 +416,9 @@ typedef struct {
// This feature limits the number of coefficients updates we actually do
// by only looking at counts from 1/2 the bands.
int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
+
+ // This flag control the use of the new super fast rtc mode
+ int super_fast_rtc;
} SPEED_FEATURES;
typedef struct VP9_COMP {
@@ -554,7 +557,7 @@ typedef struct VP9_COMP {
int cpu_used;
int pass;
- vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS];
+ vp9_prob last_skip_false_probs[3][SKIP_CONTEXTS];
int last_skip_probs_q[3];
int ref_frame_flags;
@@ -730,7 +733,8 @@ static int get_ref_frame_idx(const VP9_COMP *cpi,
static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi,
MV_REFERENCE_FRAME ref_frame) {
VP9_COMMON *const cm = &cpi->common;
- return &cm->yv12_fb[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]];
+ return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi,
+ ref_frame)]].buf;
}
void vp9_encode_frame(VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 2b9e31f08..bd28ea51e 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -124,8 +124,8 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
stride, 0x7fffffff);
// scale to 1/8 pixel resolution
- tmp_mv->as_mv.row = tmp_mv->as_mv.row << 3;
- tmp_mv->as_mv.col = tmp_mv->as_mv.col << 3;
+ tmp_mv->as_mv.row = tmp_mv->as_mv.row * 8;
+ tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8;
// calculate the bit cost on motion vector
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 4fd4457b7..9cca3bd52 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -296,7 +296,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
fill_token_costs(x->token_costs, cm->fc.coef_probs);
- if (cpi->oxcf.mode != MODE_REALTIME) {
+ if (!cpi->sf.super_fast_rtc) {
for (i = 0; i < PARTITION_CONTEXTS; i++)
vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
vp9_partition_tree);
@@ -443,7 +443,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
if (i == 0)
x->pred_sse[ref] = sse;
- if (cpi->oxcf.mode == MODE_REALTIME) {
+ if (cpi->sf.super_fast_rtc) {
dist_sum += (int)sse;
} else {
int rate;
@@ -2331,7 +2331,7 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
const VP9_COMMON *const cm = &cpi->common;
const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
- return (scaled_idx != ref_idx) ? &cm->yv12_fb[scaled_idx] : NULL;
+ return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
}
static INLINE int get_switchable_rate(const MACROBLOCK *x) {
@@ -2489,7 +2489,6 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
- return;
}
static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index b04e3fe30..8e646f669 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -301,7 +301,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};
if (mbmi->skip_coeff) {
if (!dry_run)
- cm->counts.mbskip[ctx][1] += skip_inc;
+ cm->counts.skip[ctx][1] += skip_inc;
reset_skip_context(xd, bsize);
if (dry_run)
*t = t_backup;
@@ -309,7 +309,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
}
if (!dry_run) {
- cm->counts.mbskip[ctx][0] += skip_inc;
+ cm->counts.skip[ctx][0] += skip_inc;
foreach_transformed_block(xd, bsize, tokenize_b, &arg);
} else {
foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index b722200f7..de210f4b7 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -21,6 +21,8 @@ VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
+VP9_DX_SRCS-yes += decoder/vp9_dthread.c
+VP9_DX_SRCS-yes += decoder/vp9_dthread.h
VP9_DX_SRCS-yes += decoder/vp9_reader.h
VP9_DX_SRCS-yes += decoder/vp9_reader.c
VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h