summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2013-07-31 16:15:10 -0700
committerJames Zern <jzern@google.com>2013-08-05 13:22:04 -0700
commita0ffa2794b9d3d831332f3956c2f88f3f3345aab (patch)
tree8edcdb7baf24b8b85699b05fec8b4e98ba766224
parent183b77d5ab2a4966b71be2e36eb171b44a3f2ae9 (diff)
downloadlibvpx-a0ffa2794b9d3d831332f3956c2f88f3f3345aab.tar
libvpx-a0ffa2794b9d3d831332f3956c2f88f3f3345aab.tar.gz
libvpx-a0ffa2794b9d3d831332f3956c2f88f3f3345aab.tar.bz2
libvpx-a0ffa2794b9d3d831332f3956c2f88f3f3345aab.zip
vp9/decoder: threaded row-based loop filter
Currently the only threaded option for vp9 decode. Enabled when the decoder config thread count is > 1. Change-Id: I082959abac9e31aa4a38ed9fd68b94680e57f4df
-rw-r--r--test/vp9_thread_test.cc29
-rw-r--r--vp9/common/vp9_loopfilter.c8
-rw-r--r--vp9/common/vp9_loopfilter.h14
-rw-r--r--vp9/decoder/vp9_decodframe.c31
-rw-r--r--vp9/decoder/vp9_onyxd_if.c12
-rw-r--r--vp9/decoder/vp9_onyxd_int.h3
6 files changed, 93 insertions, 4 deletions
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index 308ba9438..41d22dd3a 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -11,6 +11,10 @@
#include "vp9/decoder/vp9_thread.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/webm_video_source.h"
namespace {
@@ -77,4 +81,29 @@ TEST_F(VP9WorkerThreadTest, HookFailure) {
EXPECT_FALSE(worker_.had_error);
}
+TEST(VP9DecodeMTTest, MTDecode) {
+ libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
+ video.Init();
+
+ vpx_codec_dec_cfg_t cfg = {0};
+ cfg.threads = 2;
+ libvpx_test::VP9Decoder decoder(cfg, 0);
+
+ libvpx_test::MD5 md5;
+ for (video.Begin(); video.cxdata(); video.Next()) {
+ const vpx_codec_err_t res =
+ decoder.DecodeFrame(video.cxdata(), video.frame_size());
+ ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+
+ libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+ const vpx_image_t *img = NULL;
+
+ // Get decompressed data
+ while ((img = dec_iter.Next())) {
+ md5.Add(img);
+ }
+ }
+ EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
+}
+
} // namespace
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 5498b1717..c57f0a55d 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -376,3 +376,11 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
0, cm->mi_rows, y_only);
}
+
+int vp9_loop_filter_worker(void *arg1, void *arg2) {
+ LFWorkerData *const lf_data = (LFWorkerData*)arg1;
+ (void)arg2;
+ vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+ lf_data->start, lf_data->stop, lf_data->y_only);
+ return 1;
+}
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index e59cc6485..c6fe112ec 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -64,4 +64,18 @@ void vp9_loop_filter_frame(struct VP9Common *cm,
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
struct VP9Common *cm, struct macroblockd *xd,
int start, int stop, int y_only);
+
+typedef struct LoopFilterWorkerData {
+ const YV12_BUFFER_CONFIG *frame_buffer;
+ struct VP9Common *cm;
+ struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the
+ // loopfilter. the planes are necessary as their state
+ // is changed during decode.
+ int start;
+ int stop;
+ int y_only;
+} LFWorkerData;
+
+// Operates on the rows described by LFWorkerData passed as 'arg1'.
+int vp9_loop_filter_worker(void *arg1, void *arg2);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index ff7cb8d34..2b6f5a9c6 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -34,6 +34,7 @@
#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
+#include "vp9/decoder/vp9_thread.h"
#include "vp9/decoder/vp9_treereader.h"
static int read_be32(const uint8_t *p) {
@@ -585,10 +586,18 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
}
static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
+ const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const pc = &pbi->common;
int mi_row, mi_col;
if (pbi->do_loopfilter_inline) {
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+ lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ lf_data->cm = pc;
+ lf_data->xd = pbi->mb;
+ lf_data->y_only = 0;
+ }
vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level);
}
@@ -603,17 +612,33 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
}
if (pbi->do_loopfilter_inline) {
- YV12_BUFFER_CONFIG *const fb =
- &pbi->common.yv12_fb[pbi->common.new_fb_idx];
// delay the loopfilter by 1 macroblock row.
const int lf_start = mi_row - MI_BLOCK_SIZE;
if (lf_start < 0) continue;
- vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
+ vp9_worker_sync(&pbi->lf_worker);
+ lf_data->start = lf_start;
+ lf_data->stop = mi_row;
+ pbi->lf_worker.hook = vp9_loop_filter_worker;
+ vp9_worker_launch(&pbi->lf_worker);
+ } else {
+ YV12_BUFFER_CONFIG *const fb =
+ &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+ }
}
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ if (num_threads > 1) {
+ // TODO(jzern): since the loop filter is delayed one mb row, this will be
+ // forced to wait for the last row scheduled in the for loop.
+ vp9_worker_sync(&pbi->lf_worker);
+ }
vp9_loop_filter_rows(fb, pc, &pbi->mb,
mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
}
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index aefb56f9a..5a01dd790 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
pbi->common.error.setjmp = 0;
pbi->decoded_key_frame = 0;
+ if (pbi->oxcf.max_threads > 1) {
+ vp9_worker_init(&pbi->lf_worker);
+ pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
+ pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+ if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
+ vp9_remove_decompressor(pbi);
+ return NULL;
+ }
+ }
+
return pbi;
}
@@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
+ vp9_worker_end(&pbi->lf_worker);
+ vpx_free(pbi->lf_worker.data1);
vpx_free(pbi);
}
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 607d14c13..a051971a1 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -14,8 +14,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
-
#include "vp9/decoder/vp9_onyxd.h"
+#include "vp9/decoder/vp9_thread.h"
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -38,6 +38,7 @@ typedef struct VP9Decompressor {
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
+ VP9Worker lf_worker;
} VP9D_COMP;
#endif // VP9_DECODER_VP9_TREEREADER_H_