diff options
58 files changed, 3549 insertions, 3089 deletions
diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc index bde28a5f3..6405a6cb5 100644 --- a/test/vp9_boolcoder_test.cc +++ b/test/vp9_boolcoder_test.cc @@ -77,7 +77,7 @@ TEST(VP9, TestBitIO) { } else if (bit_method == 3) { bit = bit_rnd(2); } - GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit) + GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit) << "pos: " << i << " / " << bits_to_test << " bit_method: " << bit_method << " method: " << method; diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 48f9be1b3..0628a88b7 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -179,7 +179,6 @@ void vp9_create_common(VP9_COMMON *oci) { vp9_default_bmode_probs(oci->fc.bmode_prob); oci->txfm_mode = ONLY_4X4; - oci->mb_no_coeff_skip = 1; oci->comp_pred_mode = HYBRID_PREDICTION; oci->no_lpf = 0; oci->filter_type = NORMAL_LOOPFILTER; diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 9151622d3..6a68f6ee2 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -9,6 +9,7 @@ */ +#include "./vpx_config.h" #include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" @@ -58,6 +59,139 @@ const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = { }; #define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT) + +#if CONFIG_SBSEGMENT +const uint8_t vp9_block2left_sb16x32[TX_SIZE_MAX_MB][48] = { + { 0, 0, 0, 0, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, + S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), + 4, 4, + 5, 5, + S(4), S(4), + S(5), S(5), + 6, 6, + 7, 7, + S(6), S(6), + S(7), S(7) }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 2, 2, 2, 2, + 2, 2, 2, 2, + S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), + 4, 4, + 4, 4, + S(4), S(4), + S(4), S(4), + 6, 6, + 6, 6, + S(6), S(6), + S(6), S(6) }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0) }, +}; +const uint8_t vp9_block2above_sb16x32[TX_SIZE_MAX_MB][48] = { + { 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 4, 5, + 4, 5, + 4, 5, + 4, 5, + 6, 7, + 6, 7, + 6, 7, + 6, 7 }, + { 0, 0, 0, 0, + 2, 2, 2, 2, + 0, 0, 0, 0, + 2, 2, 2, 2, + 0, 0, 0, 0, + 2, 2, 2, 2, + 0, 0, 0, 0, + 2, 2, 2, 2, + 4, 4, + 4, 4, + 4, 4, + 4, 4, + 6, 6, + 6, 6, + 6, 6, + 6, 6 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 }, +}; + +const uint8_t vp9_block2left_sb32x16[TX_SIZE_MAX_MB][48] = { + { 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, + 5, 5, 5, 5, + 6, 6, 6, 6, + 7, 7, 7, 7 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, + 4, 4, 4, 4, + 6, 6, 6, 6, + 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; +const uint8_t vp9_block2above_sb32x16[TX_SIZE_MAX_MB][48] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; +#endif + const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, @@ -177,6 +311,353 @@ const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = { #define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) #define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) + +#if CONFIG_SBSEGMENT +const uint8_t vp9_block2left_sb32x64[TX_SIZE_MAX_SB][192] = { + { 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), + 4, 4, 4, 4, + 5, 5, 5, 5, + S(4), S(4), S(4), S(4), + S(5), S(5), S(5), S(5), + T(4), T(4), T(4), T(4), + T(5), T(5), T(5), T(5), + U(4), U(4), U(4), U(4), + U(5), U(5), U(5), U(5), + 6, 6, 6, 6, + 7, 7, 7, 7, + S(6), S(6), S(6), S(6), + S(7), S(7), S(7), S(7), + T(6), T(6), T(6), T(6), + T(7), T(7), T(7), T(7) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + 4, 4, 4, 4, + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), + U(4), U(4), U(4), U(4), + U(4), U(4), U(4), U(4), + 6, 6, 6, 6, + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), + U(6), U(6), U(6), U(6), + U(6), U(6), U(6), U(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0) }, +}; +const uint8_t vp9_block2above_sb32x64[TX_SIZE_MAX_SB][192] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +const uint8_t vp9_block2left_sb64x32[TX_SIZE_MAX_SB][192] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5), + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, +}; +const uint8_t vp9_block2above_sb64x32[TX_SIZE_MAX_SB][192] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0) }, +}; +#endif + const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -282,14 +763,14 @@ const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -408,18 +889,18 @@ const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3cfdb95eb..b5a3d4530 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -21,9 +21,6 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_enums.h" -#define TRUE 1 -#define FALSE 0 - // #define MODE_STATS #define MB_FEATURE_TREE_PROBS 3 @@ -131,18 +128,15 @@ typedef enum { typedef enum { B_DC_PRED, /* average of above and left pixels */ + B_V_PRED, /* vertical prediction */ + B_H_PRED, /* horizontal prediction */ + B_D45_PRED, + B_D135_PRED, + B_D117_PRED, + B_D153_PRED, + B_D27_PRED, + B_D63_PRED, B_TM_PRED, - - B_VE_PRED, /* vertical prediction */ - B_HE_PRED, /* horizontal prediction */ - - B_LD_PRED, - B_RD_PRED, - - B_VR_PRED, - B_VL_PRED, - B_HD_PRED, - B_HU_PRED, #if CONFIG_NEWBINTRAMODES B_CONTEXT_PRED, #endif @@ -246,13 +240,10 @@ static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) { return mb_height_log2(sb_type) + 2; } -typedef enum { - BLOCK_4X4_LG2 = 0, - BLOCK_8X8_LG2 = 2, - BLOCK_16X16_LG2 = 4, - BLOCK_32X32_LG2 = 6, - BLOCK_64X64_LG2 = 8 -} BLOCK_SIZE_LG2; +static INLINE int partition_plane(BLOCK_SIZE_TYPE sb_type) { + assert(mb_width_log2(sb_type) == mb_height_log2(sb_type)); + return (mb_width_log2(sb_type) - 1); +} typedef struct { MB_PREDICTION_MODE mode, uv_mode; @@ -325,6 +316,17 @@ struct scale_factors { int y_den; int y_offset_q4; int y_step_q4; + + int (*scale_value_x)(int val, const struct scale_factors *scale); + int (*scale_value_y)(int val, const struct scale_factors *scale); + void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col); + int_mv32 (*scale_motion_vector_q3_to_q4)(const int_mv *src_mv, + const struct scale_factors *scale); + int32_t (*scale_motion_vector_component_q4)(int mv_q4, + int num, + int den, + int offset_q4); + #if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT convolve_fn_t predict[2][2][8]; // horiz, vert, weight (0 - 7) #else @@ -435,14 +437,12 @@ typedef struct macroblockd { /* Inverse transform function pointers. */ void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch); void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch); - void (*itxm_add)(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *output, int pitch, int stride, int eob); + void (*itxm_add)(int16_t *input, const int16_t *dq, uint8_t *dest, + int stride, int eob); void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, uint8_t *dst, int stride, - struct macroblockd *xd); + uint8_t *dst, int stride, struct macroblockd *xd); void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, uint8_t *dst, int stride, - uint16_t *eobs); + uint8_t *dst, int stride, uint16_t *eobs); struct subpix_fn_table subpix; @@ -466,15 +466,15 @@ typedef struct macroblockd { static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { switch (mode) { case DC_PRED: return B_DC_PRED; - case V_PRED: return B_VE_PRED; - case H_PRED: return B_HE_PRED; + case V_PRED: return B_V_PRED; + case H_PRED: return B_H_PRED; case TM_PRED: return B_TM_PRED; - case D45_PRED: return B_LD_PRED; - case D135_PRED: return B_RD_PRED; - case D117_PRED: return B_VR_PRED; - case D153_PRED: return B_HD_PRED; - case D27_PRED: return B_HU_PRED; - case D63_PRED: return B_VL_PRED; + case D45_PRED: return B_D45_PRED; + case D135_PRED: return B_D135_PRED; + case D117_PRED: return B_D117_PRED; + case D153_PRED: return B_D153_PRED; + case D27_PRED: return B_D27_PRED; + case D63_PRED: return B_D63_PRED; default: assert(0); return B_MODE_COUNT; // Dummy value @@ -485,16 +485,16 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { switch (bmode) { case B_TM_PRED : - case B_RD_PRED : + case B_D135_PRED : return ADST_ADST; - case B_VE_PRED : - case B_VR_PRED : + case B_V_PRED : + case B_D117_PRED : return ADST_DCT; - case B_HE_PRED : - case B_HD_PRED : - case B_HU_PRED : + case B_H_PRED : + case B_D153_PRED : + case B_D27_PRED : return DCT_ADST; #if CONFIG_NEWBINTRAMODES @@ -514,6 +514,16 @@ extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96]; extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96]; extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384]; extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384]; +#if CONFIG_SBSEGMENT +extern const uint8_t vp9_block2left_sb16x32[TX_SIZE_MAX_MB][48]; +extern const uint8_t vp9_block2above_sb16x32[TX_SIZE_MAX_MB][48]; +extern const uint8_t vp9_block2left_sb32x16[TX_SIZE_MAX_MB][48]; +extern const uint8_t vp9_block2above_sb32x16[TX_SIZE_MAX_MB][48]; +extern const uint8_t vp9_block2left_sb32x64[TX_SIZE_MAX_SB][192]; +extern const uint8_t vp9_block2above_sb32x64[TX_SIZE_MAX_SB][192]; +extern const uint8_t vp9_block2left_sb64x32[TX_SIZE_MAX_SB][192]; +extern const uint8_t vp9_block2above_sb64x32[TX_SIZE_MAX_SB][192]; +#endif #define USE_ADST_FOR_I16X16_8X8 1 #define USE_ADST_FOR_I16X16_4X4 1 diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index 3ffa513ea..dbfb9ed46 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -19,9 +19,6 @@ #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" -#define TRUE 1 -#define FALSE 0 - #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) diff --git a/vp9/common/vp9_context.c b/vp9/common/vp9_context.c deleted file mode 100644 index 271b45541..000000000 --- a/vp9/common/vp9_context.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_entropy.h" - -/* *** GENERATED FILE: DO NOT EDIT *** */ - -#if 0 -int Contexts[vp8_coef_counter_dimen]; - -const int default_contexts[vp8_coef_counter_dimen] = { - { - // Block Type ( 0 ) - { - // Coeff Band ( 0 ) - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - }, - { - // Coeff Band ( 1 ) - {30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593}, - {26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987}, - {10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104}, - }, - { - // Coeff Band ( 2 ) - {25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0}, - {9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294}, - {1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879}, - }, - { - // Coeff Band ( 3 ) - {26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0}, - {8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302}, - { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611}, - }, - { - // Coeff Band ( 4 ) - {10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0}, - {2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073}, - { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50}, - }, - { - // Coeff Band ( 5 ) - {10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0}, - {2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362}, - { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190}, - }, - { - // Coeff Band ( 6 ) - {40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0}, - {6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164}, - { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345}, - }, - { - // Coeff Band ( 7 ) - { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8}, - }, - }, - { - // Block Type ( 1 ) - { - // Coeff Band ( 0 ) - {3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289}, - {8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914}, - {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620}, - }, - { - // Coeff Band ( 1 ) - {12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0}, - {11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988}, - {7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136}, - }, - { - // Coeff Band ( 2 ) - {15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0}, - {7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980}, - {1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429}, - }, - { - // Coeff Band ( 3 ) - {19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0}, - {9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820}, - {1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679}, - }, - { - // Coeff Band ( 4 ) - {12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0}, - {4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127}, - { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101}, - }, - { - // Coeff Band ( 5 ) - {12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0}, - {4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157}, - { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198}, - }, - { - // Coeff Band ( 6 ) - {61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0}, - {15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195}, - { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507}, - }, - { - // Coeff Band ( 7 ) - { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641}, - { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30}, - }, - }, - { - // Block Type ( 2 ) - { - // Coeff Band ( 0 ) - { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798}, - {1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837}, - {1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122}, - }, - { - // Coeff Band ( 1 ) - {1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0}, - {1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063}, - {1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047}, - }, - { - // Coeff Band ( 2 ) - { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0}, - { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404}, - { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236}, - }, - { - // Coeff Band ( 3 ) - { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157}, - { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300}, - }, - { - // Coeff Band ( 4 ) - { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427}, - { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7}, - }, - { - // Coeff Band ( 5 ) - { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652}, - { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30}, - }, - { - // Coeff Band ( 6 ) - { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517}, - { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3}, - }, - { - // Coeff Band ( 7 ) - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - }, - }, - { - // Block Type ( 3 ) - { - // Coeff Band ( 0 ) - {2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694}, - {8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572}, - {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284}, - }, - { - // Coeff Band ( 1 ) - {9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0}, - {12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280}, - {10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460}, - }, - { - // Coeff Band ( 2 ) - {6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0}, - {6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539}, - {3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138}, - }, - { - // Coeff Band ( 3 ) - {11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0}, - {9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181}, - {4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267}, - }, - { - // Coeff Band ( 4 ) - {4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0}, - {3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401}, - {1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268}, - }, - { - // Coeff Band ( 5 ) - {8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0}, - {3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811}, - {1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527}, - }, - { - // Coeff Band ( 6 ) - {27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0}, - {5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954}, - {1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979}, - }, - { - // Coeff Band ( 7 ) - { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459}, - { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13}, - }, - }, -}; - -// Update probabilities for the nodes in the token entropy tree. -const vp9_prob tree_update_probs[vp9_coef_tree_dimen] = { - { - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, }, - {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, }, - {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, }, - {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, -}; -#endif diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 500a278ff..5e6cba2ed 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -386,7 +386,7 @@ const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ }; -struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS]; +struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS]; /* Trees for extra bits. Probabilities are constant and do not depend on previously encoded bits */ @@ -408,7 +408,7 @@ const vp9_tree_index vp9_nzc4x4_tree[2 * NZC4X4_NODES] = { -NZC_3TO4, 8, -NZC_5TO8, -NZC_9TO16, }; -struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS]; +struct vp9_token vp9_nzc4x4_encodings[NZC4X4_TOKENS]; const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = { -NZC_0, 2, @@ -419,7 +419,7 @@ const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = { -NZC_9TO16, 12, -NZC_17TO32, -NZC_33TO64, }; -struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS]; +struct vp9_token vp9_nzc8x8_encodings[NZC8X8_TOKENS]; const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = { -NZC_0, 2, @@ -432,7 +432,7 @@ const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = { -NZC_33TO64, 16, -NZC_65TO128, -NZC_129TO256, }; -struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS]; +struct vp9_token vp9_nzc16x16_encodings[NZC16X16_TOKENS]; const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = { -NZC_0, 2, @@ -447,7 +447,7 @@ const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = { -NZC_129TO256, 20, -NZC_257TO512, -NZC_513TO1024, }; -struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS]; +struct vp9_token vp9_nzc32x32_encodings[NZC32X32_TOKENS]; const int vp9_extranzcbits[NZC32X32_TOKENS] = { 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 645faa2c6..db167420c 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -40,7 +40,7 @@ extern const int vp9_i8x8_block[4]; extern const vp9_tree_index vp9_coef_tree[]; -extern struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS]; +extern struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS]; typedef struct { vp9_tree_p tree; @@ -215,10 +215,10 @@ extern const vp9_tree_index vp9_nzc16x16_tree[]; extern const vp9_tree_index vp9_nzc32x32_tree[]; /* nzc encodings */ -extern struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS]; -extern struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS]; -extern struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS]; -extern struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS]; +extern struct vp9_token vp9_nzc4x4_encodings[NZC4X4_TOKENS]; +extern struct vp9_token vp9_nzc8x8_encodings[NZC8X8_TOKENS]; +extern struct vp9_token vp9_nzc16x16_encodings[NZC16X16_TOKENS]; +extern struct vp9_token vp9_nzc32x32_encodings[NZC32X32_TOKENS]; #define codenzc(x) (\ (x) <= 3 ? (x) : (x) <= 4 ? 3 : (x) <= 8 ? 4 : \ diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 29855b633..f4182443a 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -72,18 +72,18 @@ static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { static const unsigned int bmode_cts[VP9_NKF_BINTRAMODES] = { #if CONFIG_NEWBINTRAMODES #if CONTEXT_PRED_REPLACEMENTS == 6 - /* DC TM VE HE CONTEXT */ + /* DC TM V H CONTEXT */ 43891, 17694, 10036, 3920, 20000 #elif CONTEXT_PRED_REPLACEMENTS == 4 - /* DC TM VE HE LD RD CONTEXT */ + /* DC TM V H D45 D135 CONTEXT */ 43891, 17694, 10036, 3920, 3363, 2546, 14000 #elif CONTEXT_PRED_REPLACEMENTS == 0 - /* DC TM VE HE LD RD VR VL HD HU CONTEXT */ - 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723, 50000 + /* DC V H D45 D135 D117 D153 D27 D63 TM CONTEXT */ + 43891, 10036, 3920, 3363, 2546, 5119, 2471, 1723, 3221, 17694, 50000 #endif #else - /* DC TM VE HE LD RD VR VL HD HU */ - 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723 + /* DC V H D45 D135 D117 D153 D27 D63 TM */ + 43891, 10036, 3920, 3363, 2546, 5119, 2471, 1723, 3221, 17694 #endif }; @@ -151,18 +151,29 @@ const int vp9_mbsplit_count [VP9_NUMMBSPLITS] = { 2, 2, 4, 16}; const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150}; +#if CONFIG_SBSEGMENT +const vp9_prob vp9_partition_probs[PARTITION_PLANES][PARTITION_TYPES - 1] = { + {110, 111, 150}, + {110, 111, 150}, +}; +#else +const vp9_prob vp9_partition_probs[PARTITION_PLANES][PARTITION_TYPES - 1] = { + {200}, {200}, +}; +#endif + /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ const vp9_tree_index vp9_kf_bmode_tree[VP9_KF_BINTRAMODES * 2 - 2] = { -B_DC_PRED, 2, /* 0 = DC_NODE */ -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ + -B_V_PRED, 6, /* 2 = V_NODE */ 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ + -B_H_PRED, 10, /* 4 = H_NODE */ + -B_D135_PRED, -B_D117_PRED, /* 5 = D135_NODE */ + -B_D45_PRED, 14, /* 6 = D45_NODE */ + -B_D63_PRED, 16, /* 7 = D63_NODE */ + -B_D153_PRED, -B_D27_PRED /* 8 = D153_NODE */ }; const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = { @@ -171,36 +182,36 @@ const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = { -B_DC_PRED, 2, -B_TM_PRED, 4, 6, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS), - -B_VE_PRED, -B_HE_PRED + -B_V_PRED, -B_H_PRED #elif CONTEXT_PRED_REPLACEMENTS == 4 -B_DC_PRED, 2, -B_TM_PRED, 4, 6, 8, - -B_VE_PRED, -B_HE_PRED, + -B_V_PRED, -B_H_PRED, 10, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS), - -B_RD_PRED, -B_LD_PRED, + -B_D135_PRED, -B_D45_PRED, #elif CONTEXT_PRED_REPLACEMENTS == 0 -B_DC_PRED, 2, /* 0 = DC_NODE */ -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ + -B_V_PRED, 6, /* 2 = V_NODE */ 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, 18, - -B_HU_PRED, -B_CONTEXT_PRED + -B_H_PRED, 10, /* 4 = H_NODE */ + -B_D135_PRED, -B_D117_PRED, /* 5 = D135_NODE */ + -B_D45_PRED, 14, /* 6 = D45_NODE */ + -B_D63_PRED, 16, /* 7 = D63_NODE */ + -B_D153_PRED, 18, /* 8 = D153_NODE */ + -B_D27_PRED, -B_CONTEXT_PRED /* 9 = D27_NODE */ #endif #else -B_DC_PRED, 2, /* 0 = DC_NODE */ -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ + -B_V_PRED, 6, /* 2 = V_NODE */ 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ + -B_H_PRED, 10, /* 4 = H_NODE */ + -B_D135_PRED, -B_D117_PRED, /* 5 = D135_NODE */ + -B_D45_PRED, 14, /* 6 = D45_NODE */ + -B_D63_PRED, 16, /* 7 = D63_NODE */ + -B_D153_PRED, -B_D27_PRED /* 8 = D153_NODE */ #endif }; @@ -283,19 +294,33 @@ const vp9_tree_index vp9_sub_mv_ref_tree[6] = { -ZERO4X4, -NEW4X4 }; -struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; -struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; -struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES]; -struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES]; -struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; -struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES]; -struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES]; -struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; -struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; +#if CONFIG_SBSEGMENT +const vp9_tree_index vp9_partition_tree[6] = { + -PARTITION_NONE, 2, + -PARTITION_HORZ, 4, + -PARTITION_VERT, -PARTITION_SPLIT +}; +#else +const vp9_tree_index vp9_partition_tree[2] = { + -PARTITION_NONE, -PARTITION_SPLIT +}; +#endif + +struct vp9_token vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; +struct vp9_token vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; +struct vp9_token vp9_ymode_encodings[VP9_YMODES]; +struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES]; +struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; +struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES]; +struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES]; +struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; +struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; -struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS]; -struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; -struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; +struct vp9_token vp9_mv_ref_encoding_array[VP9_MVREFS]; +struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; +struct vp9_token vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; + +struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; void vp9_init_mbmode_probs(VP9_COMMON *x) { unsigned int bct [VP9_YMODES] [2]; /* num Ymodes > num UV modes */ @@ -332,6 +357,10 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) { vpx_memcpy(x->fc.mbsplit_prob, vp9_mbsplit_probs, sizeof(vp9_mbsplit_probs)); vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob, sizeof(vp9_switchable_interp_prob)); + + vpx_memcpy(x->fc.partition_prob, vp9_partition_probs, + sizeof(vp9_partition_probs)); + #if CONFIG_COMP_INTERINTRA_PRED x->fc.interintra_prob = VP9_DEF_INTERINTRA_PROB; #endif @@ -379,7 +408,7 @@ const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { -0, 2, -1, -2 }; -struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; +struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; #if CONFIG_ENABLE_6TAP const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { SIXTAP, EIGHTTAP, EIGHTTAP_SHARP}; @@ -397,7 +426,7 @@ const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { -0, -1, }; -struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; +struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] [VP9_SWITCHABLE_FILTERS-1] = { {248}, @@ -433,6 +462,7 @@ void vp9_entropy_mode_init() { vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree); vp9_tokens_from_tree(vp9_switchable_interp_encodings, vp9_switchable_interp_tree); + vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree); vp9_tokens_from_tree_offset(vp9_mv_ref_encoding_array, vp9_mv_ref_tree, NEARESTMV); @@ -631,6 +661,10 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { interintra_prob, factor); } #endif + for (i = 0; i < PARTITION_PLANES; i++) + update_mode_probs(PARTITION_TYPES, vp9_partition_tree, + cm->fc.partition_counts[i], cm->fc.pre_partition_prob[i], + cm->fc.partition_prob[i], 0); } static void set_default_lf_deltas(MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 8b0caf6eb..665569578 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -54,21 +54,25 @@ extern const vp9_tree_index vp9_mv_ref_tree[]; extern const vp9_tree_index vp9_sb_mv_ref_tree[]; extern const vp9_tree_index vp9_sub_mv_ref_tree[]; -extern struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; -extern struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; -extern struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES]; -extern struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES]; -extern struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; -extern struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES]; -extern struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; -extern struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES]; -extern struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; +extern struct vp9_token vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; +extern struct vp9_token vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; +extern struct vp9_token vp9_ymode_encodings[VP9_YMODES]; +extern struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES]; +extern struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; +extern struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES]; +extern struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; +extern struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES]; +extern struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; /* Inter mode values do not start at zero */ -extern struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS]; -extern struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; -extern struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; +extern struct vp9_token vp9_mv_ref_encoding_array[VP9_MVREFS]; +extern struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; +extern struct vp9_token vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; + +// probability models for partition information +extern const vp9_tree_index vp9_partition_tree[]; +extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; void vp9_entropy_mode_init(void); @@ -107,8 +111,7 @@ extern const int vp9_is_interpolating_filter[SWITCHABLE + 1]; extern const vp9_tree_index vp9_switchable_interp_tree [2 * (VP9_SWITCHABLE_FILTERS - 1)]; -extern struct vp9_token_struct vp9_switchable_interp_encodings - [VP9_SWITCHABLE_FILTERS]; +extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; extern const vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index 8330befbe..fe3667725 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c @@ -33,7 +33,7 @@ const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = { -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ }; -struct vp9_token_struct vp9_mv_joint_encodings[MV_JOINTS]; +struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { -MV_CLASS_0, 2, @@ -47,19 +47,19 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, }; -struct vp9_token_struct vp9_mv_class_encodings[MV_CLASSES]; +struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = { -0, -1, }; -struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE]; +struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = { -0, 2, -1, 4, -2, -3 }; -struct vp9_token_struct vp9_mv_fp_encodings[4]; +struct vp9_token vp9_mv_fp_encodings[4]; const nmv_context vp9_default_nmv_context = { {32, 64, 96}, @@ -212,10 +212,10 @@ void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx, const MV_JOINT_TYPE type = vp9_get_mv_joint(*mv); mvctx->joints[type]++; usehp = usehp && vp9_use_nmv_hp(ref); - if (type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ) + if (mv_joint_vertical(type)) increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp); - if (type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ) + if (mv_joint_horizontal(type)) increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp); } diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index 162d2b44f..715b5bb2b 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -45,8 +45,16 @@ typedef enum { MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ } MV_JOINT_TYPE; +static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { + return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; +} + +static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { + return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; +} + extern const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2]; -extern struct vp9_token_struct vp9_mv_joint_encodings [MV_JOINTS]; +extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; /* Symbols for coding magnitude class of nonzero components */ #define MV_CLASSES 11 @@ -65,7 +73,7 @@ typedef enum { } MV_CLASS_TYPE; extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2]; -extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES]; +extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; #define CLASS0_BITS 1 /* bits at integer precision for class 0 */ #define CLASS0_SIZE (1 << CLASS0_BITS) @@ -76,10 +84,10 @@ extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES]; #define MV_VALS ((MV_MAX << 1) + 1) extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2]; -extern struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE]; +extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; extern const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2]; -extern struct vp9_token_struct vp9_mv_fp_encodings[4]; +extern struct vp9_token vp9_mv_fp_encodings[4]; typedef struct { vp9_prob sign; diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index efa84c40f..930a5975f 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -27,4 +27,16 @@ typedef enum BLOCK_SIZE_TYPE { BLOCK_SIZE_SB64X64, } BLOCK_SIZE_TYPE; +typedef enum PARTITION_TYPE { + PARTITION_NONE, +#if CONFIG_SBSEGMENT + PARTITION_HORZ, + PARTITION_VERT, +#endif + PARTITION_SPLIT, + PARTITION_TYPES +} PARTITION_TYPE; + +#define PARTITION_PLANES 2 // number of probability models + #endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 053895840..bbbc2f62c 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -141,7 +141,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, unsigned int sse; unsigned int ref_scores[MAX_MV_REF_CANDIDATES] = {0}; int_mv sorted_mvs[MAX_MV_REF_CANDIDATES]; - int zero_seen = FALSE; + int zero_seen = 0; if (ref_y_buffer) { diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 9a5087326..a57f766db 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -16,24 +16,6 @@ #include "vp9/common/vp9_seg_common.h" static void lf_init_lut(loop_filter_info_n *lfi) { - int filt_lvl; - - for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) { - if (filt_lvl >= 40) { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3; - } else if (filt_lvl >= 20) { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2; - } else if (filt_lvl >= 15) { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1; - } else { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0; - } - } - lfi->mode_lf_lut[DC_PRED] = 1; lfi->mode_lf_lut[D45_PRED] = 1; lfi->mode_lf_lut[D135_PRED] = 1; @@ -194,25 +176,212 @@ static int mb_lf_skip(const MB_MODE_INFO *const mbmi) { static int sb_mb_lf_skip(const MODE_INFO *const mip0, const MODE_INFO *const mip1) { const MB_MODE_INFO *mbmi0 = &mip0->mbmi; - const MB_MODE_INFO *mbmi1 = &mip0->mbmi; + const MB_MODE_INFO *mbmi1 = &mip1->mbmi; return mb_lf_skip(mbmi0) && mb_lf_skip(mbmi1) && - (mbmi0->ref_frame == mbmi1->ref_frame) && - (mbmi0->mv[mbmi0->ref_frame].as_int == - mbmi1->mv[mbmi1->ref_frame].as_int) && - mbmi0->ref_frame != INTRA_FRAME; + mbmi0->ref_frame != INTRA_FRAME && + mbmi1->ref_frame != INTRA_FRAME; +} + +static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi, + int do_left_mb_v, int do_above_mb_h, + int do_left_mbuv_v, int do_above_mbuv_h, + uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, + int y_stride, int uv_stride, int dering) { + loop_filter_info_n *lfi_n = &cm->lf_info; + struct loop_filter_info lfi; + int mode = mi->mbmi.mode; + int mode_index = lfi_n->mode_lf_lut[mode]; + int seg = mi->mbmi.segment_id; + int ref_frame = mi->mbmi.ref_frame; + int filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) { + const int skip_lf = mb_lf_skip(&mi->mbmi); + const int tx_size = mi->mbmi.txfm_size; + if (cm->filter_type == NORMAL_LOOPFILTER) { + const int hev_index = filter_level >> 4; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (do_above_mb_h) { + if (tx_size >= TX_16X16) + vp9_lpf_mbh_w(y_ptr, + do_above_mbuv_h ? u_ptr : NULL, + do_above_mbuv_h ? v_ptr : NULL, + y_stride, uv_stride, &lfi); + else + vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi); + } + + if (!skip_lf) { + if (tx_size >= TX_8X8) { + if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) + vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, &lfi); + else + vp9_loop_filter_bh8x8(y_ptr, NULL, NULL, + y_stride, uv_stride, &lfi); + } else { + vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, &lfi); + } + } + + if (do_left_mb_v) { + if (tx_size >= TX_16X16) + vp9_lpf_mbv_w(y_ptr, + do_left_mbuv_v ? u_ptr : NULL, + do_left_mbuv_v ? v_ptr : NULL, + y_stride, uv_stride, &lfi); + else + vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi); + } + + if (!skip_lf) { + if (tx_size >= TX_8X8) { + if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) + vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, &lfi); + else + vp9_loop_filter_bv8x8(y_ptr, NULL, NULL, + y_stride, uv_stride, &lfi); + } else { + vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, &lfi); + } + } + if (dering) { +#if CONFIG_LOOP_DERING + vp9_post_proc_down_and_across(y_ptr, y_ptr, + y_stride, y_stride, + 16, 16, dering); + if (u_ptr && v_ptr) { + vp9_post_proc_down_and_across(u_ptr, u_ptr, + uv_stride, uv_stride, + 8, 8, dering); + vp9_post_proc_down_and_across(v_ptr, v_ptr, + uv_stride, uv_stride, + 8, 8, dering); + } +#endif + } + } else { + // TODO(yaowu): simple loop filter + } + } } +static void lpf_sb32(VP9_COMMON *cm, const MODE_INFO *mode_info_context, + int mb_row, int mb_col, + uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, + int y_stride, int uv_stride, + int y_only, int dering) { + BLOCK_SIZE_TYPE sb_type = mode_info_context->mbmi.sb_type; + TX_SIZE tx_size = mode_info_context->mbmi.txfm_size; + int do_left_v, do_above_h; + int do_left_v_mbuv, do_above_h_mbuv; + int mis = cm->mode_info_stride; + const MODE_INFO *mi; + + // process 1st MB top-left + mi = mode_info_context; + do_left_v = (mb_col > 0); + do_above_h = (mb_row > 0); + do_left_v_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 && + tx_size >= TX_32X32 && (mb_col & 2)); + do_above_h_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 && + tx_size >= TX_32X32 && (mb_row & 2)); + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr, + y_only? 0 : u_ptr, + y_only? 0 : v_ptr, + y_stride, uv_stride, dering); + // process 2nd MB top-right + mi = mode_info_context + 1; + do_left_v = !(sb_type && (tx_size >= TX_32X32 || + sb_mb_lf_skip(mode_info_context, mi))); + do_above_h = (mb_row > 0); + do_left_v_mbuv = do_left_v; + do_above_h_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 && + tx_size >= TX_32X32 && (mb_row & 2)); + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 16, + y_only ? 0 : (u_ptr + 8), + y_only ? 0 : (v_ptr + 8), + y_stride, uv_stride, dering); + + // process 3rd MB bottom-left + mi = mode_info_context + mis; + do_left_v = (mb_col > 0); + do_above_h =!(sb_type && (tx_size >= TX_32X32 || + sb_mb_lf_skip(mode_info_context, mi))); + do_left_v_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 && + tx_size >= TX_32X32 && (mb_col & 2)); + do_above_h_mbuv = do_above_h; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 16 * y_stride, + y_only ? 0 : (u_ptr + 8 * uv_stride), + y_only ? 0 : (v_ptr + 8 * uv_stride), + y_stride, uv_stride, dering); + + // process 4th MB bottom right + mi = mode_info_context + mis + 1; + do_left_v = !(sb_type && (tx_size >= TX_32X32 || + sb_mb_lf_skip(mi - 1, mi))); + do_above_h =!(sb_type && (tx_size >= TX_32X32 || + sb_mb_lf_skip(mode_info_context + 1, mi))); + do_left_v_mbuv = do_left_v; + do_above_h_mbuv = do_above_h; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 16 * y_stride + 16, + y_only ? 0 : (u_ptr + 8 * uv_stride + 8), + y_only ? 0 : (v_ptr + 8 * uv_stride + 8), + y_stride, uv_stride, dering); +} + +static void lpf_sb64(VP9_COMMON *cm, const MODE_INFO *mode_info_context, + int mb_row, int mb_col, + uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, + int y_stride, int uv_stride, + int y_only, int dering) { + lpf_sb32(cm, mode_info_context, mb_row, mb_col, + y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, y_only, dering); + lpf_sb32(cm, mode_info_context + 2, mb_row, mb_col + 2, + y_ptr + 32, u_ptr + 16, v_ptr + 16, + y_stride, uv_stride, y_only, dering); + lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 2, + mb_row + 2, mb_col, + y_ptr + 32 * y_stride, + u_ptr + 16 * uv_stride, + v_ptr + 16 * uv_stride, + y_stride, uv_stride, y_only, dering); + lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 2 + 2, + mb_row + 2, mb_col + 2, + y_ptr + 32 * y_stride + 32, + u_ptr + 16 * uv_stride + 16, + v_ptr + 16 * uv_stride + 16, + y_stride, uv_stride, y_only, dering); +} void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int dering) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; - loop_filter_info_n *lfi_n = &cm->lf_info; - struct loop_filter_info lfi; - const FRAME_TYPE frame_type = cm->frame_type; int mb_row, mb_col; - + const int sb64_rows = cm->mb_rows / 4; + const int sb64_cols = cm->mb_cols / 4; + const int extra_sb32_row = (cm->mb_rows & 2) != 0; + const int extra_sb32_col = (cm->mb_cols & 2) != 0; + const int extra_mb_col = cm->mb_cols & 1; + const int extra_mb_row = cm->mb_rows & 1; // Set up the buffer pointers uint8_t *y_ptr = post->y_buffer; uint8_t *u_ptr = y_only ? 0 : post->u_buffer; @@ -220,172 +389,197 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, // Point at base of Mb MODE_INFO list const MODE_INFO *mode_info_context = cm->mi; + const MODE_INFO *mi; const int mis = cm->mode_info_stride; + const int y_stride = post->y_stride; + const int uv_stride = post->uv_stride; + // These two flags signal if MB left edge and above edge + // should be filtered using MB edge filter. Currently, MB + // edge filtering is not applied on MB edge internal to a + // 32x32 superblock if: + // 1) SB32 is using 32x32 prediction and 32x32 transform + // 2) SB32 is using 32x32 prediction and 16x16 transform + // but all coefficients are zero. + // MB edges are on 32x32 superblock boundary are always + // filtered except on image frame boundary. + int do_left_v, do_above_h; + // These two flags signal if MB UV left edge and above edge + // should be filtered using MB edge filter. Currently, MB + // edge filtering is not applied for MB edges internal to + // a 32x32 superblock if: + // 1) SB32 is using 32x32 prediction and 32x32 transform + // 2) SB32 is using 32x32 prediction and 16x16 transform + // but all coefficients are zero. + // 3) SB32 UV edges internal to a SB64 and 32x32 transform + // is used, i.e. UV is doing 32x32 transform hence no + // transform boundary exists inside the SB64 for UV + int do_left_v_mbuv, do_above_h_mbuv; // Initialize the loop filter for this frame. vp9_loop_filter_frame_init(cm, xd, frame_filter_level); - // vp9_filter each macro block - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - const MB_PREDICTION_MODE mode = mode_info_context->mbmi.mode; - const int mode_index = lfi_n->mode_lf_lut[mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - const int filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - if (filter_level) { - const int skip_lf = mb_lf_skip(&mode_info_context->mbmi); - const int tx_size = mode_info_context->mbmi.txfm_size; - if (cm->filter_type == NORMAL_LOOPFILTER) { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0 && - !((mb_col & 1) && mode_info_context->mbmi.sb_type && - (sb_mb_lf_skip(mode_info_context - 1, mode_info_context) || - tx_size >= TX_32X32)) - ) { - if (tx_size >= TX_16X16) - vp9_lpf_mbv_w(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - if (!skip_lf) { - if (tx_size >= TX_8X8) { - if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) - vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_bv8x8(y_ptr, NULL, NULL, post->y_stride, - post->uv_stride, &lfi); - } else { - vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - } - /* don't apply across umv border */ - if (mb_row > 0 && - !((mb_row & 1) && mode_info_context->mbmi.sb_type && - (sb_mb_lf_skip(mode_info_context - mis, mode_info_context) || - tx_size >= TX_32X32)) - ) { - if (tx_size >= TX_16X16) - vp9_lpf_mbh_w(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - if (!skip_lf) { - if (tx_size >= TX_8X8) { - if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) - vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_bh8x8(y_ptr, NULL, NULL, post->y_stride, - post->uv_stride, &lfi); - } else { - vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - } -#if CONFIG_LOOP_DERING - if (dering) { - if (mb_row && mb_row < cm->mb_rows - 1 && - mb_col && mb_col < cm->mb_cols - 1) { - vp9_post_proc_down_and_across(y_ptr, y_ptr, - post->y_stride, post->y_stride, - 16, 16, dering); - if (!y_only) { - vp9_post_proc_down_and_across(u_ptr, u_ptr, - post->uv_stride, post->uv_stride, - 8, 8, dering); - vp9_post_proc_down_and_across(v_ptr, v_ptr, - post->uv_stride, post->uv_stride, - 8, 8, dering); - } - } else { - // Adjust the filter so that no out-of-frame data is used. - uint8_t *dr_y = y_ptr, *dr_u = u_ptr, *dr_v = v_ptr; - int w_adjust = 0; - int h_adjust = 0; - - if (mb_col == 0) { - dr_y += 2; - dr_u += 2; - dr_v += 2; - w_adjust += 2; - } - if (mb_col == cm->mb_cols - 1) - w_adjust += 2; - if (mb_row == 0) { - dr_y += 2 * post->y_stride; - dr_u += 2 * post->uv_stride; - dr_v += 2 * post->uv_stride; - h_adjust += 2; - } - if (mb_row == cm->mb_rows - 1) - h_adjust += 2; - vp9_post_proc_down_and_across_c(dr_y, dr_y, - post->y_stride, post->y_stride, - 16 - w_adjust, 16 - h_adjust, - dering); - if (!y_only) { - vp9_post_proc_down_and_across_c(dr_u, dr_u, - post->uv_stride, - post->uv_stride, - 8 - w_adjust, 8 - h_adjust, - dering); - vp9_post_proc_down_and_across_c(dr_v, dr_v, - post->uv_stride, - post->uv_stride, - 8 - w_adjust, 8 - h_adjust, - dering); - } - } - } -#endif - } else { - // FIXME: Not 8x8 aware - if (mb_col > 0 && - !(skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) && - !((mb_col & 1) && mode_info_context->mbmi.sb_type)) - vp9_loop_filter_simple_mbv(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); - if (!skip_lf) - vp9_loop_filter_simple_bv(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0 && - !(skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) && - !((mb_row & 1) && mode_info_context->mbmi.sb_type)) - vp9_loop_filter_simple_mbh(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); - if (!skip_lf) - vp9_loop_filter_simple_bh(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); - } - } + // vp9_filter each 64x64 SB + // For each SB64: the 4 SB32 are filtered in raster scan order + // For each SB32: the 4 MBs are filtered in raster scan order + // For each MB: the left and above MB edges as well as the + // internal block edges are processed together + for (mb_row = 0; mb_row < sb64_rows * 4; mb_row += 4) { + for (mb_col = 0; mb_col < sb64_cols * 4; mb_col += 4) { + lpf_sb64(cm, mode_info_context, mb_row, mb_col, + y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, y_only, dering); + y_ptr += 64; + u_ptr = y_only? 0 : u_ptr + 32; + v_ptr = y_only? 0 : v_ptr + 32; + mode_info_context += 4; // step to next SB64 + } + if (extra_sb32_col) { + // process 2 SB32s in the extra SB32 col + lpf_sb32(cm, mode_info_context, mb_row, mb_col, + y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, y_only, dering); + lpf_sb32(cm, mode_info_context + mis * 2, + mb_row + 2, mb_col, + y_ptr + 32 * y_stride, + u_ptr + 16 * uv_stride, + v_ptr + 16 * uv_stride, + y_stride, uv_stride, y_only, dering); + y_ptr += 32; + u_ptr = y_only? 0 : u_ptr + 16; + v_ptr = y_only? 0 : v_ptr + 16; + mode_info_context += 2; // step to next SB32 + mb_col += 2; + } + if (extra_mb_col) { + // process 4 MB in the extra MB col + // process 1st MB + mi = mode_info_context; + do_left_v = (mb_col > 0); + do_above_h = (mb_row > 0); + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr, + y_only? 0 : u_ptr, + y_only? 0 : v_ptr, + y_stride, uv_stride, dering); + // process 2nd MB + mi = mode_info_context + mis; + do_left_v = (mb_col > 0); + do_above_h = 1; + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 16 * y_stride, + y_only ? 0 : (u_ptr + 8 * uv_stride), + y_only ? 0 : (v_ptr + 8 * uv_stride), + y_stride, uv_stride, dering); + // process 3nd MB + mi = mode_info_context + mis * 2; + do_left_v = (mb_col > 0); + do_above_h = 1; + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 32 * y_stride, + y_only ? 0 : (u_ptr + 16 * uv_stride), + y_only ? 0 : (v_ptr + 16 * uv_stride), + y_stride, uv_stride, dering); + // process 4th MB + mi = mode_info_context + mis * 3; + do_left_v = (mb_col > 0); + do_above_h = 1; + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 48 * y_stride, + y_only ? 0 : (u_ptr + 24 * uv_stride), + y_only ? 0 : (v_ptr + 24 * uv_stride), + y_stride, uv_stride, dering); y_ptr += 16; - if (!y_only) { - u_ptr += 8; - v_ptr += 8; - } - mode_info_context++; // step to next MB + u_ptr = y_only? 0 : u_ptr + 8; + v_ptr = y_only? 0 : v_ptr + 8; + mode_info_context++; // step to next MB } - y_ptr += post->y_stride * 16 - post->y_width; + // move pointers to the begining of next sb64 row + y_ptr += y_stride * 64 - post->y_width; if (!y_only) { - u_ptr += post->uv_stride * 8 - post->uv_width; - v_ptr += post->uv_stride * 8 - post->uv_width; + u_ptr += uv_stride * 32 - post->uv_width; + v_ptr += uv_stride * 32 - post->uv_width; + } + /* skip to next SB64 row */ + mode_info_context += mis * 4 - cm->mb_cols; + } + if (extra_sb32_row) { + const int sb32_cols = sb64_cols * 2 + extra_sb32_col; + for (mb_col = 0; mb_col < sb32_cols * 2; mb_col += 2) { + lpf_sb32(cm, mode_info_context, mb_row, mb_col, + y_ptr, u_ptr, v_ptr, + y_stride, uv_stride, y_only, dering); + y_ptr += 32; + u_ptr = y_only? 0 : u_ptr + 16; + v_ptr = y_only? 0 : v_ptr + 16; + mode_info_context += 2; // step to next SB32 + } + if (extra_mb_col) { + // process 1st MB + mi = mode_info_context; + do_left_v = (mb_col > 0); + do_above_h = (mb_row > 0); + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr, + y_only? NULL : u_ptr, + y_only? NULL : v_ptr, + y_stride, uv_stride, dering); + // process 2nd MB + mi = mode_info_context + mis; + do_left_v = (mb_col > 0); + do_above_h = 1; + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr + 16 * y_stride, + y_only ? NULL : (u_ptr + 8 * uv_stride), + y_only ? NULL : (v_ptr + 8 * uv_stride), + y_stride, uv_stride, dering); + y_ptr += 16; + u_ptr = y_only? 0 : u_ptr + 8; + v_ptr = y_only? 0 : v_ptr + 8; + mode_info_context++; /* step to next MB */ + } + // move pointers to the beginning of next sb64 row + y_ptr += y_stride * 32 - post->y_width; + u_ptr += y_only? 0 : uv_stride * 16 - post->uv_width; + v_ptr += y_only? 0 : uv_stride * 16 - post->uv_width; + // skip to next MB row if exist + mode_info_context += mis * 2 - cm->mb_cols; + mb_row += 2; + } + if (extra_mb_row) { + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { + const MODE_INFO *mi = mode_info_context; + do_left_v = (mb_col > 0); + do_above_h = (mb_row > 0); + do_left_v_mbuv = 1; + do_above_h_mbuv = 1; + lpf_mb(cm, mi, do_left_v, do_above_h, + do_left_v_mbuv, do_above_h_mbuv, + y_ptr, + y_only? 0 : u_ptr, + y_only? 0 : v_ptr, + y_stride, uv_stride, dering); + y_ptr += 16; + u_ptr = y_only? 0 : u_ptr + 8; + v_ptr = y_only? 0 : v_ptr + 8; + mode_info_context++; // step to next MB } - mode_info_context++; // Skip border mb } } - - diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index bd9a35125..3b81146e2 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -37,7 +37,6 @@ typedef struct { DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]); unsigned char lvl[4][4][4]; - unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1]; unsigned char mode_lf_lut[MB_MODE_COUNT]; } loop_filter_info_n; diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index e1a12e411..666197366 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -158,7 +158,7 @@ static void addmv_and_shuffle( int i; int insert_point; - int duplicate_found = FALSE; + int duplicate_found = 0; // Check for duplicates. If there is one increase its score. // We only compare vs the current top candidates. @@ -171,7 +171,7 @@ static void addmv_and_shuffle( while (i > 0) { i--; if (candidate_mv.as_int == mv_list[i].as_int) { - duplicate_found = TRUE; + duplicate_found = 1; mv_scores[i] += weight; break; } @@ -251,7 +251,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, int split_count = 0; int (*mv_ref_search)[2]; int *ref_distance_weight; - int zero_seen = FALSE; + int zero_seen = 0; const int mb_col = (-xd->mb_to_left_edge) >> 7; // Blank the reference vector lists and other local structures. @@ -395,7 +395,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, // Scan for 0,0 case and clamp non zero choices for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { if (candidate_mvs[i].as_int == 0) { - zero_seen = TRUE; + zero_seen = 1; } else { clamp_mv_ref(xd, &candidate_mvs[i]); } diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index c7ca67efe..66698f71a 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -68,6 +68,7 @@ typedef struct frame_contexts { vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; + vp9_prob partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1]; vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; @@ -95,6 +96,7 @@ typedef struct frame_contexts { vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1]; + vp9_prob pre_partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1]; unsigned int bmode_counts[VP9_NKF_BINTRAMODES]; unsigned int ymode_counts[VP9_YMODES]; /* interframe intra mode probs */ unsigned int sb_ymode_counts[VP9_I32X32_MODES]; @@ -102,6 +104,7 @@ typedef struct frame_contexts { unsigned int i8x8_mode_counts[VP9_I8X8_MODES]; /* interframe intra probs */ unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS]; unsigned int mbsplit_counts[VP9_NUMMBSPLITS]; + unsigned int partition_counts[PARTITION_PLANES][PARTITION_TYPES]; vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES]; @@ -220,7 +223,6 @@ typedef struct VP9Common { /* profile settings */ int experimental; - int mb_no_coeff_skip; TXFM_MODE txfm_mode; COMPPREDMODE_TYPE comp_pred_mode; int no_lpf; @@ -280,8 +282,6 @@ typedef struct VP9Common { vp9_prob prob_intra_coded; vp9_prob prob_last_coded; vp9_prob prob_gf_coded; - vp9_prob prob_sb32_coded; - vp9_prob prob_sb64_coded; // Context probabilities when using predictive coding of segment id vp9_prob segment_pred_probs[PREDICTION_PROBS]; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index ee378d239..716781170 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -33,6 +33,24 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, scale->y_offset_q4 = 0; // calculated per-mb scale->y_step_q4 = 16 * other_h / this_h; + if (scale->x_num == scale->x_den && scale->y_num == scale->y_den) { + scale->scale_value_x = unscaled_value; + scale->scale_value_y = unscaled_value; + scale->set_scaled_offsets = set_offsets_without_scaling; + scale->scale_motion_vector_q3_to_q4 = + motion_vector_q3_to_q4_without_scaling; + scale->scale_motion_vector_component_q4 = + motion_vector_component_q4_without_scaling; + } else { + scale->scale_value_x = scale_value_x_with_scaling; + scale->scale_value_y = scale_value_y_with_scaling; + scale->set_scaled_offsets = set_offsets_with_scaling; + scale->scale_motion_vector_q3_to_q4 = + motion_vector_q3_to_q4_with_scaling; + scale->scale_motion_vector_component_q4 = + motion_vector_component_q4_with_scaling; + } + // TODO(agrange): Investigate the best choice of functions to use here // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what // to do at full-pel offsets. The current selection, where the filter is @@ -325,60 +343,13 @@ void vp9_copy_mem8x4_c(const uint8_t *src, } } -static void set_scaled_offsets(struct scale_factors *scale, - int row, int col) { - const int x_q4 = 16 * col; - const int y_q4 = 16 * row; - - scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf; - scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf; -} - -static int32_t scale_motion_vector_component_q3(int mv_q3, - int num, - int den, - int offset_q4) { - // returns the scaled and offset value of the mv component. - const int32_t mv_q4 = mv_q3 << 1; - - /* TODO(jkoleszar): make fixed point, or as a second multiply? */ - return mv_q4 * num / den + offset_q4; -} - -static int32_t scale_motion_vector_component_q4(int mv_q4, - int num, - int den, - int offset_q4) { - // returns the scaled and offset value of the mv component. - - /* TODO(jkoleszar): make fixed point, or as a second multiply? */ - return mv_q4 * num / den + offset_q4; -} - -static int_mv32 scale_motion_vector_q3_to_q4( - const int_mv *src_mv, - const struct scale_factors *scale) { - // returns mv * scale + offset - int_mv32 result; - - result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row, - scale->y_num, - scale->y_den, - scale->y_offset_q4); - result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col, - scale->x_num, - scale->x_den, - scale->x_offset_q4); - return result; -} - void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int_mv *mv_q3, const struct scale_factors *scale, int w, int h, int weight, const struct subpix_fn_table *subpix) { - int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale); + int_mv32 mv = scale->scale_motion_vector_q3_to_q4(mv_q3, scale); src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight]( src, src_stride, dst, dst_stride, @@ -402,11 +373,11 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4) + (frac_mv_q4->as_mv.col & 0xf); const int scaled_mv_row_q4 = - scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den, - scale->y_offset_q4); + scale->scale_motion_vector_component_q4(mv_row_q4, scale->y_num, + scale->y_den, scale->y_offset_q4); const int scaled_mv_col_q4 = - scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den, - scale->x_offset_q4); + scale->scale_motion_vector_component_q4(mv_col_q4, scale->x_num, + scale->x_den, scale->x_offset_q4); const int subpel_x = scaled_mv_col_q4 & 15; const int subpel_y = scaled_mv_row_q4 & 15; @@ -419,17 +390,19 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, } static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, - struct scale_factors *scale, + struct scale_factors *s, uint8_t *predictor, int block_size, int stride, int which_mv, int weight, int width, int height, const struct subpix_fn_table *subpix, int row, int col) { + struct scale_factors * scale = &s[which_mv]; + assert(d1->predictor - d0->predictor == block_size); assert(d1->pre == d0->pre + block_size); - set_scaled_offsets(&scale[which_mv], row, col); + scale->set_scaled_offsets(scale, row, col); if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; @@ -438,7 +411,7 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, d0->pre_stride, predictor, stride, &d0->bmi.as_mv[which_mv], - &scale[which_mv], + scale, width, height, weight, subpix); @@ -450,37 +423,39 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, d0->pre_stride, predictor, stride, &d0->bmi.as_mv[which_mv], - &scale[which_mv], + scale, width > block_size ? block_size : width, height, weight, subpix); if (width <= block_size) return; - set_scaled_offsets(&scale[which_mv], row, col + block_size); + scale->set_scaled_offsets(scale, row, col + block_size); vp9_build_inter_predictor(*base_pre1 + d1->pre, d1->pre_stride, predictor + block_size, stride, &d1->bmi.as_mv[which_mv], - &scale[which_mv], + scale, width - block_size, height, weight, subpix); } } static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, - struct scale_factors *scale, + struct scale_factors *s, int block_size, int stride, int which_mv, int weight, const struct subpix_fn_table *subpix, int row, int col, int use_dst) { uint8_t *d0_predictor = use_dst ? *(d0->base_dst) + d0->dst : d0->predictor; uint8_t *d1_predictor = use_dst ? *(d1->base_dst) + d1->dst : d1->predictor; + struct scale_factors * scale = &s[which_mv]; stride = use_dst ? d0->dst_stride : stride; + assert(d1_predictor - d0_predictor == block_size); assert(d1->pre == d0->pre + block_size); - set_scaled_offsets(&scale[which_mv], row, col); + scale->set_scaled_offsets(scale, row, col); if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; @@ -489,7 +464,7 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, d0->pre_stride, d0_predictor, stride, &d0->bmi.as_mv[which_mv], - &scale[which_mv], + scale, 2 * block_size, block_size, weight, subpix); } else { @@ -500,17 +475,17 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, d0->pre_stride, d0_predictor, stride, &d0->bmi.as_mv[which_mv], - &scale[which_mv], + scale, block_size, block_size, weight, subpix); - set_scaled_offsets(&scale[which_mv], row, col + block_size); + scale->set_scaled_offsets(scale, row, col + block_size); vp9_build_inter_predictor(*base_pre1 + d1->pre, d1->pre_stride, d1_predictor, stride, &d1->bmi.as_mv[which_mv], - &scale[which_mv], + scale, block_size, block_size, weight, subpix); } @@ -774,6 +749,7 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, int weight; int edge[4]; int block_size = 16 << xd->mode_info_context->mbmi.sb_type; + struct scale_factors *scale; if (!use_second_ref) return 0; if (!(xd->up_available || xd->left_available)) @@ -789,17 +765,17 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, pre_stride = xd->second_pre.y_stride; ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int; // First generate the second predictor + scale = &xd->scale_factor[1]; for (n = 0; n < block_size; n += 16) { xd->mb_to_left_edge = edge[2] - (n << 3); xd->mb_to_right_edge = edge[3] + ((16 - n) << 3); if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[1], mb_row * 16, mb_col * 16 + n); + scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16 + n); // predict a single row of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[1]), - pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[1], - 16, 1, 0, &xd->subpix); + vp9_build_inter_predictor(base_pre + + scaled_buffer_offset(n, 0, pre_stride, scale), + pre_stride, tmp_y + n, tmp_ystride, &ymv, scale, 16, 1, 0, &xd->subpix); } xd->mb_to_left_edge = edge[2]; xd->mb_to_right_edge = edge[3]; @@ -808,12 +784,12 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3); if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[1], mb_row * 16 + n, mb_col * 16); + scale->set_scaled_offsets(scale, mb_row * 16 + n, mb_col * 16); // predict a single col of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[1]), + vp9_build_inter_predictor(base_pre + + scaled_buffer_offset(0, n, pre_stride, scale), pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv, - &xd->scale_factor[1], 1, 16, 0, &xd->subpix); + scale, 1, 16, 0, &xd->subpix); } xd->mb_to_top_edge = edge[0]; xd->mb_to_bottom_edge = edge[1]; @@ -825,17 +801,17 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, pre_stride = xd->pre.y_stride; ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int; // Now generate the first predictor + scale = &xd->scale_factor[0]; for (n = 0; n < block_size; n += 16) { xd->mb_to_left_edge = edge[2] - (n << 3); xd->mb_to_right_edge = edge[3] + ((16 - n) << 3); if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[0], mb_row * 16, mb_col * 16 + n); + scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16 + n); // predict a single row of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[0]), - pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[0], - 16, 1, 0, &xd->subpix); + vp9_build_inter_predictor(base_pre + + scaled_buffer_offset(n, 0, pre_stride, scale), + pre_stride, tmp_y + n, tmp_ystride, &ymv, scale, 16, 1, 0, &xd->subpix); } xd->mb_to_left_edge = edge[2]; xd->mb_to_right_edge = edge[3]; @@ -844,12 +820,12 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3); if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[0], mb_row * 16 + n, mb_col * 16); + scale->set_scaled_offsets(scale, mb_row * 16 + n, mb_col * 16); // predict a single col of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[0]), + vp9_build_inter_predictor(base_pre + + scaled_buffer_offset(0, n, pre_stride, scale), pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv, - &xd->scale_factor[0], 1, 16, 0, &xd->subpix); + scale, 1, 16, 0, &xd->subpix); } xd->mb_to_top_edge = edge[0]; xd->mb_to_bottom_edge = edge[1]; @@ -877,17 +853,18 @@ static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd, uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; int_mv ymv; + struct scale_factors *scale = &xd->scale_factor[which_mv]; + ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); + scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16); - vp9_build_inter_predictor(base_pre, pre_stride, - dst_y, dst_ystride, - &ymv, &xd->scale_factor[which_mv], - 16, 16, which_mv ? weight : 0, &xd->subpix); + vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride, + &ymv, scale, 16, 16, + which_mv ? weight : 0, &xd->subpix); } } @@ -920,17 +897,17 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; int_mv ymv; + struct scale_factors *scale = &xd->scale_factor[which_mv]; + ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); + scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16); - vp9_build_inter_predictor(base_pre, pre_stride, - dst_y, dst_ystride, - &ymv, &xd->scale_factor[which_mv], - 16, 16, which_mv, &xd->subpix); + vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride, + &ymv, scale, 16, 16, which_mv, &xd->subpix); } } #endif @@ -956,6 +933,8 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd, int_mv _o16x16mv; int_mv _16x16mv; + struct scale_factors *scale = &xd->scale_factor_uv[which_mv]; + _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; if (clamp_mvs) @@ -979,18 +958,15 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd, uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); - set_scaled_offsets(&xd->scale_factor_uv[which_mv], - mb_row * 16, mb_col * 16); + scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16); vp9_build_inter_predictor_q4( uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, - which_mv ? weight : 0, &xd->subpix); + scale, 8, 8, which_mv ? weight : 0, &xd->subpix); vp9_build_inter_predictor_q4( vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, - which_mv ? weight : 0, &xd->subpix); + scale, 8, 8, which_mv ? weight : 0, &xd->subpix); } } @@ -1030,6 +1006,8 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, int_mv _o16x16mv; int_mv _16x16mv; + struct scale_factors *scale = &xd->scale_factor_uv[which_mv]; + _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; if (clamp_mvs) @@ -1053,17 +1031,16 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); - set_scaled_offsets(&xd->scale_factor_uv[which_mv], - mb_row * 16, mb_col * 16); + scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16); vp9_build_inter_predictor_q4( uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, + scale, 8, 8, which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); vp9_build_inter_predictor_q4( vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, + scale, 8, 8, which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 585fcfd6d..068853d1d 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -77,20 +77,27 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, int w, int h, int do_avg, const struct subpix_fn_table *subpix); -static int scale_value_x(int val, const struct scale_factors *scale) { +static int scale_value_x_with_scaling(int val, + const struct scale_factors *scale) { return val * scale->x_num / scale->x_den; } -static int scale_value_y(int val, const struct scale_factors *scale) { +static int scale_value_y_with_scaling(int val, + const struct scale_factors *scale) { return val * scale->y_num / scale->y_den; } +static int unscaled_value(int val, const struct scale_factors *scale) { + (void) scale; + return val; +} + static int scaled_buffer_offset(int x_offset, int y_offset, int stride, const struct scale_factors *scale) { - return scale_value_y(y_offset, scale) * stride + - scale_value_x(x_offset, scale); + return scale->scale_value_y(y_offset, scale) * stride + + scale->scale_value_x(x_offset, scale); } static void setup_pred_block(YV12_BUFFER_CONFIG *dst, @@ -112,6 +119,7 @@ static void setup_pred_block(YV12_BUFFER_CONFIG *dst, recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; } + *dst = *src; dst->y_buffer += recon_yoffset; dst->u_buffer += recon_uvoffset; @@ -128,4 +136,66 @@ static void set_scale_factors(MACROBLOCKD *xd, xd->scale_factor_uv[1] = xd->scale_factor[1]; } +static void set_offsets_with_scaling(struct scale_factors *scale, + int row, int col) { + const int x_q4 = 16 * col; + const int y_q4 = 16 * row; + + scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf; + scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf; +} + +static void set_offsets_without_scaling(struct scale_factors *scale, + int row, int col) { + scale->x_offset_q4 = 0; + scale->y_offset_q4 = 0; +} + +static int_mv32 motion_vector_q3_to_q4_with_scaling( + const int_mv *src_mv, + const struct scale_factors *scale) { + // returns mv * scale + offset + int_mv32 result; + const int32_t mv_row_q4 = src_mv->as_mv.row << 1; + const int32_t mv_col_q4 = src_mv->as_mv.col << 1; + + /* TODO(jkoleszar): make fixed point, or as a second multiply? */ + result.as_mv.row = mv_row_q4 * scale->y_num / scale->y_den + + scale->y_offset_q4; + result.as_mv.col = mv_col_q4 * scale->x_num / scale->x_den + + scale->x_offset_q4; + return result; +} + +static int_mv32 motion_vector_q3_to_q4_without_scaling( + const int_mv *src_mv, + const struct scale_factors *scale) { + // returns mv * scale + offset + int_mv32 result; + + result.as_mv.row = src_mv->as_mv.row << 1; + result.as_mv.col = src_mv->as_mv.col << 1; + return result; +} + +static int32_t motion_vector_component_q4_with_scaling(int mv_q4, + int num, + int den, + int offset_q4) { + // returns the scaled and offset value of the mv component. + + /* TODO(jkoleszar): make fixed point, or as a second multiply? */ + return mv_q4 * num / den + offset_q4; +} + +static int32_t motion_vector_component_q4_without_scaling(int mv_q4, + int num, + int den, + int offset_q4) { + // returns the scaled and offset value of the mv component. + (void)num; + (void)den; + (void)offset_q4; + return mv_q4; +} #endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 34e95a252..632191183 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -31,7 +31,7 @@ static const unsigned int iscale[64] = { }; static INLINE int iscale_round(int value, int i) { - return ROUND_POWER_OF_TWO(value * iscale[i], 16); + return ROUND_POWER_OF_TWO(value * iscale[i], 16); } static void d27_predictor(uint8_t *ypred_ptr, int y_stride, @@ -70,7 +70,7 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride, ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride] + yleft_col[r], 1); for (r = bh - 2; r >= bh / 2; --r) { - int w = c + (bh - 1 - r) * 2; + const int w = c + (bh - 1 - r) * 2; ypred_ptr[r * y_stride + w] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] + ypred_ptr[r * y_stride + w - 1], 1); @@ -78,7 +78,7 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride, for (c = 1; c < bw; c++) { for (r = bh - 1; r >= bh / 2 + c / 2; --r) { - int w = c + (bh - 1 - r) * 2; + const int w = c + (bh - 1 - r) * 2; ypred_ptr[r * y_stride + w] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] + ypred_ptr[r * y_stride + w - 1], 1); @@ -121,7 +121,7 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride, c = bw - 1; ypred_ptr[c] = ROUND_POWER_OF_TWO(ypred_ptr[(c - 1)] + yabove_row[c], 1); for (c = bw - 2; c >= bw / 2; --c) { - int h = r + (bw - 1 - c) * 2; + const int h = r + (bw - 1 - c) * 2; ypred_ptr[h * y_stride + c] = ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] + ypred_ptr[(h - 1) * y_stride + c], 1); @@ -129,7 +129,7 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride, for (r = 1; r < bh; r++) { for (c = bw - 1; c >= bw / 2 + r / 2; --c) { - int h = r + (bw - 1 - c) * 2; + const int h = r + (bw - 1 - c) * 2; ypred_ptr[h * y_stride + c] = ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] + ypred_ptr[(h - 1) * y_stride + c], 1); @@ -197,9 +197,8 @@ static void d135_predictor(uint8_t *ypred_ptr, int y_stride, ypred_ptr += y_stride; for (r = 1; r < bh; ++r) { - for (c = 1; c < bw; c++) { + for (c = 1; c < bw; c++) ypred_ptr[c] = ypred_ptr[-y_stride + c - 1]; - } ypred_ptr += y_stride; } } @@ -300,14 +299,13 @@ void vp9_build_intra_predictors(uint8_t *src, int src_stride, int r, c, i; uint8_t yleft_col[64], yabove_data[65], ytop_left; uint8_t *yabove_row = yabove_data + 1; - /* - * 127 127 127 .. 127 127 127 127 127 127 - * 129 A B .. Y Z - * 129 C D .. W X - * 129 E F .. U V - * 129 G H .. S T T T T T - * .. - */ + + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // .. if (left_available) { for (i = 0; i < bh; i++) @@ -319,42 +317,34 @@ void vp9_build_intra_predictors(uint8_t *src, int src_stride, if (up_available) { uint8_t *yabove_ptr = src - src_stride; vpx_memcpy(yabove_row, yabove_ptr, bw); - if (left_available) { - ytop_left = yabove_ptr[-1]; - } else { - ytop_left = 127; - } + ytop_left = left_available ? yabove_ptr[-1] : 127; } else { vpx_memset(yabove_row, 127, bw); ytop_left = 127; } yabove_row[-1] = ytop_left; - /* for Y */ + switch (mode) { case DC_PRED: { - int expected_dc; int i; + int expected_dc = 128; int average = 0; int count = 0; if (up_available || left_available) { if (up_available) { - for (i = 0; i < bw; i++) { + for (i = 0; i < bw; i++) average += yabove_row[i]; - } count += bw; } if (left_available) { - for (i = 0; i < bh; i++) { + for (i = 0; i < bh; i++) average += yleft_col[i]; - } count += bh; } expected_dc = (average + (count >> 1)) / count; - } else { - expected_dc = 128; } for (r = 0; r < bh; r++) { @@ -377,39 +367,102 @@ void vp9_build_intra_predictors(uint8_t *src, int src_stride, break; case TM_PRED: for (r = 0; r < bh; r++) { - for (c = 0; c < bw; c++) { + for (c = 0; c < bw; c++) ypred_ptr[c] = clip_pixel(yleft_col[r] + yabove_row[c] - ytop_left); - } ypred_ptr += y_stride; } break; +#if CONFIG_SBSEGMENT case D45_PRED: - d45_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); - break; case D135_PRED: - d135_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); - break; case D117_PRED: - d117_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); - break; case D153_PRED: - d153_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); - break; case D27_PRED: - d27_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); - break; case D63_PRED: - d63_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + if (bw == bh) { + switch (mode) { +#endif + case D45_PRED: + d45_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + break; + case D135_PRED: + d135_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + break; + case D117_PRED: + d117_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + break; + case D153_PRED: + d153_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + break; + case D27_PRED: + d27_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + break; + case D63_PRED: + d63_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); + break; +#if CONFIG_SBSEGMENT + default: + assert(0); + } + } else if (bw > bh) { + uint8_t pred[64*64]; + memset(yleft_col + bh, yleft_col[bh - 1], bw - bh); + switch (mode) { + case D45_PRED: + d45_predictor(pred, 64, bw, bw, yabove_row, yleft_col); + break; + case D135_PRED: + d135_predictor(pred, 64, bw, bw, yabove_row, yleft_col); + break; + case D117_PRED: + d117_predictor(pred, 64, bw, bw, yabove_row, yleft_col); + break; + case D153_PRED: + d153_predictor(pred, 64, bw, bw, yabove_row, yleft_col); + break; + case D27_PRED: + d27_predictor(pred, 64, bw, bw, yabove_row, yleft_col); + break; + case D63_PRED: + d63_predictor(pred, 64, bw, bw, yabove_row, yleft_col); + break; + default: + assert(0); + } + for (i = 0; i < bh; i++) + memcpy(ypred_ptr + y_stride * i, pred + i * 64, bw); + } else { + uint8_t pred[64 * 64]; + memset(yabove_row + bw, yabove_row[bw - 1], bh - bw); + switch (mode) { + case D45_PRED: + d45_predictor(pred, 64, bh, bh, yabove_row, yleft_col); + break; + case D135_PRED: + d135_predictor(pred, 64, bh, bh, yabove_row, yleft_col); + break; + case D117_PRED: + d117_predictor(pred, 64, bh, bh, yabove_row, yleft_col); + break; + case D153_PRED: + d153_predictor(pred, 64, bh, bh, yabove_row, yleft_col); + break; + case D27_PRED: + d27_predictor(pred, 64, bh, bh, yabove_row, yleft_col); + break; + case D63_PRED: + d63_predictor(pred, 64, bh, bh, yabove_row, yleft_col); + break; + default: + assert(0); + } + for (i = 0; i < bh; i++) + memcpy(ypred_ptr + y_stride * i, pred + i * 64, bw); + } break; - case I8X8_PRED: - case I4X4_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: +#endif + default: break; } } @@ -746,7 +799,7 @@ void vp9_intra8x8_predict(MACROBLOCKD *xd, const int block4x4_idx = (b - xd->block); const int block_idx = (block4x4_idx >> 2) | !!(block4x4_idx & 2); const int have_top = (block_idx >> 1) || xd->up_available; - const int have_left = (block_idx & 1) || xd->left_available; + const int have_left = (block_idx & 1) || xd->left_available; const int have_right = !(block_idx & 1) || xd->right_available; vp9_build_intra_predictors(*(b->base_dst) + b->dst, @@ -761,7 +814,7 @@ void vp9_intra_uv4x4_predict(MACROBLOCKD *xd, uint8_t *predictor, int pre_stride) { const int block_idx = (b - xd->block) & 3; const int have_top = (block_idx >> 1) || xd->up_available; - const int have_left = (block_idx & 1) || xd->left_available; + const int have_left = (block_idx & 1) || xd->left_available; const int have_right = !(block_idx & 1) || xd->right_available; vp9_build_intra_predictors(*(b->base_dst) + b->dst, diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index 08bfd1d8e..a0700010b 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -57,17 +57,17 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, */ switch (imin) { case 1: - return B_HD_PRED; + return B_D153_PRED; case 2: - return B_RD_PRED; + return B_D135_PRED; case 3: - return B_VR_PRED; + return B_D117_PRED; case 5: - return B_VL_PRED; + return B_D63_PRED; case 6: - return B_LD_PRED; + return B_D45_PRED; case 7: - return B_HU_PRED; + return B_D27_PRED; default: assert(0); } @@ -93,13 +93,13 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, */ switch (imin) { case 1: - return B_HD_PRED; + return B_D153_PRED; case 3: - return B_VR_PRED; + return B_D117_PRED; case 5: - return B_VL_PRED; + return B_D63_PRED; case 7: - return B_HU_PRED; + return B_D27_PRED; default: assert(0); } @@ -126,21 +126,21 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, switch (imin) { case 0: - return B_HE_PRED; + return B_H_PRED; case 1: - return B_HD_PRED; + return B_D153_PRED; case 2: - return B_RD_PRED; + return B_D135_PRED; case 3: - return B_VR_PRED; + return B_D117_PRED; case 4: - return B_VE_PRED; + return B_V_PRED; case 5: - return B_VL_PRED; + return B_D63_PRED; case 6: - return B_LD_PRED; + return B_D45_PRED; case 7: - return B_HU_PRED; + return B_D27_PRED; default: assert(0); } @@ -195,12 +195,7 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, if (have_top) { uint8_t *above_ptr = *(x->base_dst) + x->dst - x->dst_stride; - - if (have_left) { - top_left = above_ptr[-1]; - } else { - top_left = 127; - } + top_left = have_left ? above_ptr[-1] : 127; above[0] = above_ptr[0]; above[1] = above_ptr[1]; @@ -270,13 +265,11 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, expected_dc += left[i]; } - expected_dc = (expected_dc + 4) >> 3; + expected_dc = ROUND_POWER_OF_TWO(expected_dc, 3); for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { + for (c = 0; c < 4; c++) predictor[c] = expected_dc; - } - predictor += ps; } } @@ -284,210 +277,160 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, case B_TM_PRED: { /* prediction similar to true_motion prediction */ for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { + for (c = 0; c < 4; c++) predictor[c] = clip_pixel(above[c] - top_left + left[r]); - } - predictor += ps; } } break; - - case B_VE_PRED: { - unsigned int ap[4]; - - ap[0] = above[0]; - ap[1] = above[1]; - ap[2] = above[2]; - ap[3] = above[3]; - + case B_V_PRED: for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = ap[c]; - } - + for (c = 0; c < 4; c++) + predictor[c] = above[c]; predictor += ps; } - } - break; - - case B_HE_PRED: { - unsigned int lp[4]; - - lp[0] = left[0]; - lp[1] = left[1]; - lp[2] = left[2]; - lp[3] = left[3]; - + break; + case B_H_PRED: for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = lp[r]; - } - + for (c = 0; c < 4; c++) + predictor[c] = left[r]; predictor += ps; } - } - break; - case B_LD_PRED: { - uint8_t *ptr = above; + break; + case B_D45_PRED: { + uint8_t *p = above; - predictor[0 * ps + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; + predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2); predictor[0 * ps + 1] = - predictor[1 * ps + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; + predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2); predictor[0 * ps + 2] = predictor[1 * ps + 1] = - predictor[2 * ps + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; + predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2); predictor[0 * ps + 3] = predictor[1 * ps + 2] = predictor[2 * ps + 1] = - predictor[3 * ps + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; + predictor[3 * ps + 0] = + ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2); predictor[1 * ps + 3] = predictor[2 * ps + 2] = - predictor[3 * ps + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; + predictor[3 * ps + 1] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2); predictor[2 * ps + 3] = - predictor[3 * ps + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; - predictor[3 * ps + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; + predictor[3 * ps + 2] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2); + predictor[3 * ps + 3] = ROUND_POWER_OF_TWO(p[6] + p[7] * 2 + p[7], 2); } break; - case B_RD_PRED: { - uint8_t pp[9]; - - pp[0] = left[3]; - pp[1] = left[2]; - pp[2] = left[1]; - pp[3] = left[0]; - pp[4] = top_left; - pp[5] = above[0]; - pp[6] = above[1]; - pp[7] = above[2]; - pp[8] = above[3]; + case B_D135_PRED: { + uint8_t p[9] = { left[3], left[2], left[1], left[0], + top_left, + above[0], above[1], above[2], above[3] }; - predictor[3 * ps + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2); predictor[3 * ps + 1] = - predictor[2 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2); predictor[3 * ps + 2] = predictor[2 * ps + 1] = - predictor[1 * ps + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2); predictor[3 * ps + 3] = predictor[2 * ps + 2] = predictor[1 * ps + 1] = - predictor[0 * ps + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[0 * ps + 0] = + ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2); predictor[2 * ps + 3] = predictor[1 * ps + 2] = - predictor[0 * ps + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2); predictor[1 * ps + 3] = - predictor[0 * ps + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[0 * ps + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; + predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2); + predictor[0 * ps + 3] = ROUND_POWER_OF_TWO(p[6] + p[7] * 2 + p[8], 2); } break; - case B_VR_PRED: { - uint8_t pp[9]; + case B_D117_PRED: { + uint8_t p[9] = { left[3], left[2], left[1], left[0], + top_left, + above[0], above[1], above[2], above[3] }; - pp[0] = left[3]; - pp[1] = left[2]; - pp[2] = left[1]; - pp[3] = left[0]; - pp[4] = top_left; - pp[5] = above[0]; - pp[6] = above[1]; - pp[7] = above[2]; - pp[8] = above[3]; - - predictor[3 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * ps + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2); + predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2); predictor[3 * ps + 1] = - predictor[1 * ps + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2); predictor[2 * ps + 1] = - predictor[0 * ps + 0] = (pp[4] + pp[5] + 1) >> 1; + predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[4] + p[5], 1); predictor[3 * ps + 2] = - predictor[1 * ps + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[1 * ps + 1] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2); predictor[2 * ps + 2] = - predictor[0 * ps + 1] = (pp[5] + pp[6] + 1) >> 1; + predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[5] + p[6], 1); predictor[3 * ps + 3] = - predictor[1 * ps + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[1 * ps + 2] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2); predictor[2 * ps + 3] = - predictor[0 * ps + 2] = (pp[6] + pp[7] + 1) >> 1; - predictor[1 * ps + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - predictor[0 * ps + 3] = (pp[7] + pp[8] + 1) >> 1; + predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[6] + p[7], 1); + predictor[1 * ps + 3] = ROUND_POWER_OF_TWO(p[6] + p[7] * 2 + p[8], 2); + predictor[0 * ps + 3] = ROUND_POWER_OF_TWO(p[7] + p[8], 1); } break; - case B_VL_PRED: { - uint8_t *pp = above; + case B_D63_PRED: { + uint8_t *p = above; - predictor[0 * ps + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[1 * ps + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1], 1); + predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2); predictor[2 * ps + 0] = - predictor[0 * ps + 1] = (pp[1] + pp[2] + 1) >> 1; + predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[1] + p[2], 1); predictor[1 * ps + 1] = - predictor[3 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2); predictor[2 * ps + 1] = - predictor[0 * ps + 2] = (pp[2] + pp[3] + 1) >> 1; + predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[2] + p[3], 1); predictor[3 * ps + 1] = - predictor[1 * ps + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[1 * ps + 2] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2); predictor[0 * ps + 3] = - predictor[2 * ps + 2] = (pp[3] + pp[4] + 1) >> 1; + predictor[2 * ps + 2] = ROUND_POWER_OF_TWO(p[3] + p[4], 1); predictor[1 * ps + 3] = - predictor[3 * ps + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * ps + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[3 * ps + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[3 * ps + 2] = ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2); + predictor[2 * ps + 3] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2); + predictor[3 * ps + 3] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2); } break; + case B_D153_PRED: { + uint8_t p[9] = { left[3], left[2], left[1], left[0], + top_left, + above[0], above[1], above[2], above[3] }; - case B_HD_PRED: { - uint8_t pp[9]; - - pp[0] = left[3]; - pp[1] = left[2]; - pp[2] = left[1]; - pp[3] = left[0]; - pp[4] = top_left; - pp[5] = above[0]; - pp[6] = above[1]; - pp[7] = above[2]; - pp[8] = above[3]; - - - predictor[3 * ps + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[3 * ps + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1], 1); + predictor[3 * ps + 1] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2); predictor[2 * ps + 0] = - predictor[3 * ps + 2] = (pp[1] + pp[2] + 1) >> 1; + predictor[3 * ps + 2] = ROUND_POWER_OF_TWO(p[1] + p[2], 1); predictor[2 * ps + 1] = - predictor[3 * ps + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[3 * ps + 3] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2); predictor[2 * ps + 2] = - predictor[1 * ps + 0] = (pp[2] + pp[3] + 1) >> 1; + predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3], 1); predictor[2 * ps + 3] = - predictor[1 * ps + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[1 * ps + 1] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2); predictor[1 * ps + 2] = - predictor[0 * ps + 0] = (pp[3] + pp[4] + 1) >> 1; + predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[3] + p[4], 1); predictor[1 * ps + 3] = - predictor[0 * ps + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[0 * ps + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[0 * ps + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2); + predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2); + predictor[0 * ps + 3] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2); } break; - - - case B_HU_PRED: { - uint8_t *pp = left; - predictor[0 * ps + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[0 * ps + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + case B_D27_PRED: { + uint8_t *p = left; + predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1], 1); + predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2); predictor[0 * ps + 2] = - predictor[1 * ps + 0] = (pp[1] + pp[2] + 1) >> 1; + predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2], 1); predictor[0 * ps + 3] = - predictor[1 * ps + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[1 * ps + 1] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2); predictor[1 * ps + 2] = - predictor[2 * ps + 0] = (pp[2] + pp[3] + 1) >> 1; + predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3], 1); predictor[1 * ps + 3] = - predictor[2 * ps + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; + predictor[2 * ps + 1] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[3], 2); predictor[2 * ps + 2] = predictor[2 * ps + 3] = predictor[3 * ps + 0] = predictor[3 * ps + 1] = predictor[3 * ps + 2] = - predictor[3 * ps + 3] = pp[3]; + predictor[3 * ps + 3] = p[3]; } break; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 8ce9574c2..6db44a0a0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -27,25 +27,25 @@ forward_decls vp9_common_forward_decls # # Dequant # -prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" +prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob" specialize vp9_dequant_idct_add_16x16 -prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" +prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob" specialize vp9_dequant_idct_add_8x8 -prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" +prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob" specialize vp9_dequant_idct_add -prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block -prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, uint16_t *eobs" +prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block -prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" +prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, int eob" specialize vp9_dequant_idct_add_32x32 # @@ -108,25 +108,25 @@ prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x specialize vp9_intra_uv4x4_predict; if [ "$CONFIG_VP9_DECODER" = "yes" ]; then -prototype void vp9_add_residual_4x4 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_residual_4x4 "const int16_t *diff, uint8_t *dest, int stride" specialize vp9_add_residual_4x4 sse2 -prototype void vp9_add_residual_8x8 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_residual_8x8 "const int16_t *diff, uint8_t *dest, int stride" specialize vp9_add_residual_8x8 sse2 -prototype void vp9_add_residual_16x16 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_residual_16x16 "const int16_t *diff, uint8_t *dest, int stride" specialize vp9_add_residual_16x16 sse2 -prototype void vp9_add_residual_32x32 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_residual_32x32 "const int16_t *diff, uint8_t *dest, int stride" specialize vp9_add_residual_32x32 sse2 -prototype void vp9_add_constant_residual_8x8 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_constant_residual_8x8 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_8x8 sse2 -prototype void vp9_add_constant_residual_16x16 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_constant_residual_16x16 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_16x16 sse2 -prototype void vp9_add_constant_residual_32x32 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_constant_residual_32x32 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_32x32 sse2 fi @@ -376,6 +376,22 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then # variance [ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 +if [ "$CONFIG_SBSEGMENT" = "yes" ]; then + +prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance32x16 + +prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance16x32 + +prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance64x32 + +prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance32x64 + +fi + prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance32x32 @@ -410,6 +426,22 @@ vp9_variance4x4_mmx=vp9_variance4x4_mmx prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance64x64 sse2 +if [ "$CONFIG_SBSEGMENT" = "yes" ]; then + +prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance32x64 + +prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance64x32 + +prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance32x16 + +prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance16x32 + +fi + prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance32x32 sse2 @@ -436,6 +468,22 @@ vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad64x64 sse2 +if [ "$CONFIG_SBSEGMENT" = "yes" ]; then + +prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad32x64 sse2 + +prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad64x32 sse2 + +prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad32x16 sse2 + +prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad16x32 sse2 + +fi + prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad32x32 sse2 @@ -529,6 +577,22 @@ specialize vp9_sad4x4x8 sse4 prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad64x64x4d sse2 +if [ "$CONFIG_SBSEGMENT" = "yes" ]; then + +prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad32x64x4d sse2 + +prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad64x32x4d sse2 + +prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad32x16x4d sse2 + +prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad16x32x4d sse2 + +fi + prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x4d sse2 @@ -606,10 +670,10 @@ prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int p specialize vp9_short_fdct8x8 sse2 prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fdct4x4 +specialize vp9_short_fdct4x4 sse2 prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fdct8x4 +specialize vp9_short_fdct8x4 sse2 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct32x32 diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c index 6e2597954..3f049b5b3 100644 --- a/vp9/common/vp9_treecoder.c +++ b/vp9/common/vp9_treecoder.c @@ -18,32 +18,27 @@ #include "vp9/common/vp9_treecoder.h" -static void tree2tok( - struct vp9_token_struct *const p, - vp9_tree t, - int i, - int v, - int L -) { +static void tree2tok(struct vp9_token *const p, vp9_tree t, + int i, int v, int l) { v += v; - ++L; + ++l; do { const vp9_tree_index j = t[i++]; if (j <= 0) { p[-j].value = v; - p[-j].Len = L; + p[-j].len = l; } else - tree2tok(p, t, j, v, L); + tree2tok(p, t, j, v, l); } while (++v & 1); } -void vp9_tokens_from_tree(struct vp9_token_struct *p, vp9_tree t) { +void vp9_tokens_from_tree(struct vp9_token *p, vp9_tree t) { tree2tok(p, t, 0, 0, 0); } -void vp9_tokens_from_tree_offset(struct vp9_token_struct *p, vp9_tree t, +void vp9_tokens_from_tree_offset(struct vp9_token *p, vp9_tree t, int offset) { tree2tok(p - offset, t, 0, 0, 0); } diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h index 9297d5280..ebcd4116f 100644 --- a/vp9/common/vp9_treecoder.h +++ b/vp9/common/vp9_treecoder.h @@ -13,6 +13,7 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vp9/common/vp9_common.h" typedef uint8_t vp9_prob; @@ -31,16 +32,15 @@ typedef int8_t vp9_tree_index; typedef const vp9_tree_index vp9_tree[], *vp9_tree_p; -typedef const struct vp9_token_struct { +struct vp9_token { int value; - int Len; -} vp9_token; + int len; +}; /* Construct encoding array from tree. */ -void vp9_tokens_from_tree(struct vp9_token_struct *, vp9_tree); -void vp9_tokens_from_tree_offset(struct vp9_token_struct *, vp9_tree, - int offset); +void vp9_tokens_from_tree(struct vp9_token*, vp9_tree); +void vp9_tokens_from_tree_offset(struct vp9_token*, vp9_tree, int offset); /* Convert array of token occurrence counts into a table of probabilities for the associated binary encoding tree. Also writes count of branches @@ -76,7 +76,7 @@ static INLINE vp9_prob get_binary_prob(int n0, int n1) { /* this function assumes prob1 and prob2 are already within [1,255] range */ static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) { - return (prob1 * (256 - factor) + prob2 * factor + 128) >> 8; + return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); } #endif // VP9_COMMON_VP9_TREECODER_H_ diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c index 7e3b4646b..dcd591642 100644 --- a/vp9/decoder/vp9_dboolhuff.c +++ b/vp9/decoder/vp9_dboolhuff.c @@ -13,34 +13,29 @@ #include "vp9/decoder/vp9_dboolhuff.h" -int vp9_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz) { - br->user_buffer_end = source + source_sz; - br->user_buffer = source; +int vp9_start_decode(BOOL_DECODER *br, const uint8_t *buffer, size_t size) { + br->buffer_end = buffer + size; + br->buffer = buffer; br->value = 0; br->count = -8; br->range = 255; - if (source_sz && !source) + if (size && !buffer) return 1; - /* Populate the buffer */ - vp9_bool_decoder_fill(br); - + vp9_reader_fill(br); return 0; } - -void vp9_bool_decoder_fill(BOOL_DECODER *br) { - const unsigned char *bufptr = br->user_buffer; - const unsigned char *bufend = br->user_buffer_end; +void vp9_reader_fill(BOOL_DECODER *br) { + const uint8_t *const buffer_end = br->buffer_end; + const uint8_t *buffer = br->buffer; VP9_BD_VALUE value = br->value; int count = br->count; int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8); int loop_end = 0; - int bits_left = (int)((bufend - bufptr)*CHAR_BIT); - int x = shift + CHAR_BIT - bits_left; + const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT); + const int x = shift + CHAR_BIT - bits_left; if (x >= 0) { count += VP9_LOTS_OF_BITS; @@ -50,18 +45,18 @@ void vp9_bool_decoder_fill(BOOL_DECODER *br) { if (x < 0 || bits_left) { while (shift >= loop_end) { count += CHAR_BIT; - value |= (VP9_BD_VALUE)*bufptr++ << shift; + value |= (VP9_BD_VALUE)*buffer++ << shift; shift -= CHAR_BIT; } } - br->user_buffer = bufptr; + br->buffer = buffer; br->value = value; br->count = count; } -static int get_unsigned_bits(unsigned num_values) { +static int get_unsigned_bits(unsigned int num_values) { int cat = 0; if (num_values <= 1) return 0; @@ -84,30 +79,29 @@ int vp9_inv_recenter_nonneg(int v, int m) { int vp9_decode_uniform(BOOL_DECODER *br, int n) { int v; - int l = get_unsigned_bits(n); - int m = (1 << l) - n; - if (!l) return 0; - v = decode_value(br, l - 1); - if (v < m) - return v; - else - return (v << 1) - m + decode_value(br, 1); + const int l = get_unsigned_bits(n); + const int m = (1 << l) - n; + if (!l) + return 0; + + v = vp9_read_literal(br, l - 1); + return v < m ? v : (v << 1) - m + vp9_read_bit(br); } int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms) { int i = 0, mk = 0, word; while (1) { - int b = (i ? k + i - 1 : k); - int a = (1 << b); + const int b = i ? k + i - 1 : k; + const int a = 1 << b; if (num_syms <= mk + 3 * a) { word = vp9_decode_uniform(br, num_syms - mk) + mk; break; } else { - if (decode_value(br, 1)) { + if (vp9_read_bit(br)) { i++; mk += a; } else { - word = decode_value(br, b) + mk; + word = vp9_read_literal(br, b) + mk; break; } } @@ -119,10 +113,8 @@ int vp9_decode_unsigned_max(BOOL_DECODER *br, int max) { int data = 0, bit = 0, lmax = max; while (lmax) { - data |= decode_bool(br, 128) << bit++; + data |= vp9_read_bit(br) << bit++; lmax >>= 1; } - if (data > max) - return max; - return data; + return data > max ? max : data; } diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index 02ae1d3c8..10b7a1af3 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -21,32 +21,40 @@ typedef size_t VP9_BD_VALUE; #define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) -/*This is meant to be a large, positive constant that can still be efficiently - loaded as an immediate (on platforms like ARM, for example). - Even relatively modest values like 100 would work fine.*/ -#define VP9_LOTS_OF_BITS (0x40000000) + +// This is meant to be a large, positive constant that can still be efficiently +// loaded as an immediate (on platforms like ARM, for example). +// Even relatively modest values like 100 would work fine. +#define VP9_LOTS_OF_BITS 0x40000000 typedef struct { - const unsigned char *user_buffer_end; - const unsigned char *user_buffer; - VP9_BD_VALUE value; - int count; - unsigned int range; + const uint8_t *buffer_end; + const uint8_t *buffer; + VP9_BD_VALUE value; + int count; + unsigned int range; } BOOL_DECODER; DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); -int vp9_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz); +int vp9_start_decode(BOOL_DECODER *br, const uint8_t *buffer, size_t size); -void vp9_bool_decoder_fill(BOOL_DECODER *br); +void vp9_reader_fill(BOOL_DECODER *br); int vp9_decode_uniform(BOOL_DECODER *br, int n); int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms); int vp9_inv_recenter_nonneg(int v, int m); -static int decode_bool(BOOL_DECODER *br, int probability) { +static INLINE const uint8_t *vp9_reader_find_end(BOOL_DECODER *br) { + // Find the end of the coded buffer + while (br->count > CHAR_BIT && br->count < VP9_BD_VALUE_SIZE) { + br->count -= CHAR_BIT; + br->buffer--; + } + return br->buffer; +} + +static int vp9_read(BOOL_DECODER *br, int probability) { unsigned int bit = 0; VP9_BD_VALUE value; VP9_BD_VALUE bigsplit; @@ -55,7 +63,7 @@ static int decode_bool(BOOL_DECODER *br, int probability) { unsigned int split = 1 + (((br->range - 1) * probability) >> 8); if (br->count < 0) - vp9_bool_decoder_fill(br); + vp9_reader_fill(br); value = br->value; count = br->count; @@ -83,12 +91,15 @@ static int decode_bool(BOOL_DECODER *br, int probability) { return bit; } -static int decode_value(BOOL_DECODER *br, int bits) { - int z = 0; - int bit; +static int vp9_read_bit(BOOL_DECODER *r) { + return vp9_read(r, 128); // vp9_prob_half +} + +static int vp9_read_literal(BOOL_DECODER *br, int bits) { + int z = 0, bit; for (bit = bits - 1; bit >= 0; bit--) { - z |= decode_bool(br, 0x80) << bit; + z |= vp9_read_bit(br) << bit; } return z; diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 60ef89118..aaa9b2ef0 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -35,8 +35,8 @@ int dec_mvcount = 0; extern int dec_debug; #endif -static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) { - B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p); +static B_PREDICTION_MODE read_bmode(vp9_reader *r, const vp9_prob *p) { + B_PREDICTION_MODE m = treed_read(r, vp9_bmode_tree, p); #if CONFIG_NEWBINTRAMODES if (m == B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS) m = B_CONTEXT_PRED; @@ -45,32 +45,32 @@ static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) { return m; } -static B_PREDICTION_MODE read_kf_bmode(vp9_reader *bc, const vp9_prob *p) { - return (B_PREDICTION_MODE)treed_read(bc, vp9_kf_bmode_tree, p); +static B_PREDICTION_MODE read_kf_bmode(vp9_reader *r, const vp9_prob *p) { + return (B_PREDICTION_MODE)treed_read(r, vp9_kf_bmode_tree, p); } -static MB_PREDICTION_MODE read_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_ymode_tree, p); +static MB_PREDICTION_MODE read_ymode(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(r, vp9_ymode_tree, p); } -static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_sb_ymode_tree, p); +static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(r, vp9_sb_ymode_tree, p); } -static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p); +static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(r, vp9_uv_mode_tree, p); } -static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_kf_ymode_tree, p); +static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(r, vp9_kf_ymode_tree, p); } -static int read_i8x8_mode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_i8x8_mode_tree, p); +static int read_i8x8_mode(vp9_reader *r, const vp9_prob *p) { + return treed_read(r, vp9_i8x8_mode_tree, p); } -static MB_PREDICTION_MODE read_uv_mode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p); +static MB_PREDICTION_MODE read_uv_mode(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(r, vp9_uv_mode_tree, p); } // This function reads the current macro block's segnent id from the bitstream @@ -117,24 +117,20 @@ int vp9_read_mv_ref_id(vp9_reader *r, vp9_prob *ref_id_probs) { #endif extern const int vp9_i8x8_block[4]; -static void kfread_modes(VP9D_COMP *pbi, - MODE_INFO *m, - int mb_row, - int mb_col, - BOOL_DECODER* const bc) { +static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, + int mb_row, int mb_col, + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const int mis = pbi->common.mode_info_stride; - int map_index = mb_row * pbi->common.mb_cols + mb_col; - MB_PREDICTION_MODE y_mode; - + MACROBLOCKD *const xd = &pbi->mb; + const int mis = cm->mode_info_stride; + const int map_index = mb_row * cm->mb_cols + mb_col; m->mbmi.ref_frame = INTRA_FRAME; // Read the Macroblock segmentation map if it is being updated explicitly // this frame (reset to 0 by default). m->mbmi.segment_id = 0; - if (pbi->mb.update_mb_segmentation_map) { - read_mb_segid(bc, &m->mbmi, &pbi->mb); + if (xd->update_mb_segmentation_map) { + read_mb_segid(r, &m->mbmi, xd); if (m->mbmi.sb_type) { const int bw = 1 << mb_width_log2(m->mbmi.sb_type); const int bh = 1 << mb_height_log2(m->mbmi.sb_type); @@ -144,8 +140,8 @@ static void kfread_modes(VP9D_COMP *pbi, for (y = 0; y < ymbs; y++) { for (x = 0; x < xmbs; x++) { - cm->last_frame_seg_map[map_index + x + y * cm->mb_cols] = - m->mbmi.segment_id; + const int index = y * cm->mb_cols + x; + cm->last_frame_seg_map[map_index + index] = m->mbmi.segment_id; } } } else { @@ -153,41 +149,33 @@ static void kfread_modes(VP9D_COMP *pbi, } } - m->mbmi.mb_skip_coeff = 0; - if (pbi->common.mb_no_coeff_skip && - (!vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, SEG_LVL_SKIP))) { - m->mbmi.mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, &pbi->mb, - PRED_MBSKIP)); - } else { - m->mbmi.mb_skip_coeff = vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, - SEG_LVL_SKIP); - } + m->mbmi.mb_skip_coeff = vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, + SEG_LVL_SKIP); + if (!m->mbmi.mb_skip_coeff) + m->mbmi.mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); - y_mode = m->mbmi.sb_type ? - read_kf_sb_ymode(bc, - pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]): - read_kf_mb_ymode(bc, - pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); + m->mbmi.mode = m->mbmi.sb_type ? + read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]): + read_kf_mb_ymode(r, cm->kf_ymode_prob[cm->kf_ymode_probs_index]); m->mbmi.ref_frame = INTRA_FRAME; - if ((m->mbmi.mode = y_mode) == I4X4_PRED) { + if (m->mbmi.mode == I4X4_PRED) { int i = 0; do { const B_PREDICTION_MODE a = above_block_mode(m, i, mis); - const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ? + const B_PREDICTION_MODE l = xd->left_available || (i & 3) ? left_block_mode(m, i) : B_DC_PRED; - m->bmi[i].as_mode.first = read_kf_bmode(bc, - pbi->common.kf_bmode_prob[a][l]); + m->bmi[i].as_mode.first = read_kf_bmode(r, cm->kf_bmode_prob[a][l]); } while (++i < 16); } - if ((m->mbmi.mode = y_mode) == I8X8_PRED) { + if (m->mbmi.mode == I8X8_PRED) { int i; for (i = 0; i < 4; i++) { const int ib = vp9_i8x8_block[i]; - const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); + const int mode8x8 = read_i8x8_mode(r, cm->fc.i8x8_mode_prob); m->bmi[ib + 0].as_mode.first = mode8x8; m->bmi[ib + 1].as_mode.first = mode8x8; @@ -195,19 +183,18 @@ static void kfread_modes(VP9D_COMP *pbi, m->bmi[ib + 5].as_mode.first = mode8x8; } } else { - m->mbmi.uv_mode = read_uv_mode(bc, - pbi->common.kf_uv_mode_prob[m->mbmi.mode]); + m->mbmi.uv_mode = read_uv_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]); } if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 && m->mbmi.mode <= I8X8_PRED) { // FIXME(rbultje) code ternary symbol once all experiments are merged - m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]); + m->mbmi.txfm_size = vp9_read(r, cm->prob_tx[0]); if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { - m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); + m->mbmi.txfm_size += vp9_read(r, cm->prob_tx[1]); if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) - m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); + m->mbmi.txfm_size += vp9_read(r, cm->prob_tx[2]); } } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { @@ -271,104 +258,100 @@ static int read_nmv_component_fp(vp9_reader *r, static void read_nmv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *mvctx) { const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints); - mv->row = mv-> col = 0; - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { + mv->row = mv->col = 0; + + if (mv_joint_vertical(j)) mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_horizontal(j)) mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]); - } } static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *mvctx, int usehp) { const MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); usehp = usehp && vp9_use_nmv_hp(ref); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_vertical(j)) mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0], usehp); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { + + if (mv_joint_horizontal(j)) mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1], usehp); - } - /* - printf("MV: %d %d REF: %d %d\n", mv->row + ref->row, mv->col + ref->col, - ref->row, ref->col); - */ } -static void update_nmv(vp9_reader *bc, vp9_prob *const p, +static void update_nmv(vp9_reader *r, vp9_prob *const p, const vp9_prob upd_p) { - if (vp9_read(bc, upd_p)) { + if (vp9_read(r, upd_p)) { #ifdef LOW_PRECISION_MV_UPDATE - *p = (vp9_read_literal(bc, 7) << 1) | 1; + *p = (vp9_read_literal(r, 7) << 1) | 1; #else - *p = (vp9_read_literal(bc, 8)); + *p = (vp9_read_literal(r, 8)); #endif } } -static void read_nmvprobs(vp9_reader *bc, nmv_context *mvctx, +static void read_nmvprobs(vp9_reader *r, nmv_context *mvctx, int usehp) { int i, j, k; #ifdef MV_GROUP_UPDATE - if (!vp9_read_bit(bc)) + if (!vp9_read_bit(r)) return; #endif for (j = 0; j < MV_JOINTS - 1; ++j) - update_nmv(bc, &mvctx->joints[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->joints[j], VP9_NMV_UPDATE_PROB); for (i = 0; i < 2; ++i) { - update_nmv(bc, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB); for (j = 0; j < MV_CLASSES - 1; ++j) - update_nmv(bc, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB); for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_nmv(bc, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_nmv(bc, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { for (j = 0; j < CLASS0_SIZE; ++j) for (k = 0; k < 3; ++k) - update_nmv(bc, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB); for (j = 0; j < 3; ++j) - update_nmv(bc, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB); } if (usehp) { for (i = 0; i < 2; ++i) { - update_nmv(bc, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); - update_nmv(bc, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB); } } } // Read the referncence frame static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, - vp9_reader *const bc, + vp9_reader *r, unsigned char segment_id) { MV_REFERENCE_FRAME ref_frame; VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; int seg_ref_count = 0; - int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); + const int seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); + + const int intra = vp9_check_segref(xd, segment_id, INTRA_FRAME); + const int last = vp9_check_segref(xd, segment_id, LAST_FRAME); + const int golden = vp9_check_segref(xd, segment_id, GOLDEN_FRAME); + const int altref = vp9_check_segref(xd, segment_id, ALTREF_FRAME); // If segment coding enabled does the segment allow for more than one // possible reference frame - if (seg_ref_active) { - seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME); - } + if (seg_ref_active) + seg_ref_count = intra + last + golden + altref; // Segment reference frame features not available or allows for // multiple reference frame options @@ -380,7 +363,7 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF); // Read the prediction status flag - unsigned char prediction_flag = vp9_read(bc, pred_prob); + unsigned char prediction_flag = vp9_read(r, pred_prob); // Store the prediction flag. vp9_set_pred_flag(xd, PRED_REF, prediction_flag); @@ -394,19 +377,15 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, } else { // decode the explicitly coded value vp9_prob mod_refprobs[PREDICTION_PROBS]; - vpx_memcpy(mod_refprobs, - cm->mod_refprobs[pred_ref], sizeof(mod_refprobs)); + vpx_memcpy(mod_refprobs, cm->mod_refprobs[pred_ref], + sizeof(mod_refprobs)); // If segment coding enabled blank out options that cant occur by // setting the branch probability to 0. if (seg_ref_active) { - mod_refprobs[INTRA_FRAME] *= - vp9_check_segref(xd, segment_id, INTRA_FRAME); - mod_refprobs[LAST_FRAME] *= - vp9_check_segref(xd, segment_id, LAST_FRAME); - mod_refprobs[GOLDEN_FRAME] *= - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) * - vp9_check_segref(xd, segment_id, ALTREF_FRAME); + mod_refprobs[INTRA_FRAME] *= intra; + mod_refprobs[LAST_FRAME] *= last; + mod_refprobs[GOLDEN_FRAME] *= golden * altref; } // Default to INTRA_FRAME (value 0) @@ -414,32 +393,28 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, // Do we need to decode the Intra/Inter branch if (mod_refprobs[0]) - ref_frame = vp9_read(bc, mod_refprobs[0]); + ref_frame = vp9_read(r, mod_refprobs[0]); else ref_frame++; if (ref_frame) { // Do we need to decode the Last/Gf_Arf branch if (mod_refprobs[1]) - ref_frame += vp9_read(bc, mod_refprobs[1]); + ref_frame += vp9_read(r, mod_refprobs[1]); else ref_frame++; if (ref_frame > 1) { // Do we need to decode the GF/Arf branch if (mod_refprobs[2]) { - ref_frame += vp9_read(bc, mod_refprobs[2]); + ref_frame += vp9_read(r, mod_refprobs[2]); } else { - if (seg_ref_active) { - ref_frame = pred_ref == GOLDEN_FRAME || - !vp9_check_segref(xd, segment_id, GOLDEN_FRAME) - ? ALTREF_FRAME - : GOLDEN_FRAME; - } else { - ref_frame = pred_ref == GOLDEN_FRAME - ? ALTREF_FRAME - : GOLDEN_FRAME; - } + if (seg_ref_active) + ref_frame = pred_ref == GOLDEN_FRAME || !golden ? ALTREF_FRAME + : GOLDEN_FRAME; + else + ref_frame = pred_ref == GOLDEN_FRAME ? ALTREF_FRAME + : GOLDEN_FRAME; } } } @@ -456,16 +431,16 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, return ref_frame; } -static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE) treed_read(bc, vp9_sb_mv_ref_tree, p); +static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE) treed_read(r, vp9_sb_mv_ref_tree, p); } -static MB_PREDICTION_MODE read_mv_ref(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE) treed_read(bc, vp9_mv_ref_tree, p); +static MB_PREDICTION_MODE read_mv_ref(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE) treed_read(r, vp9_mv_ref_tree, p); } -static B_PREDICTION_MODE sub_mv_ref(vp9_reader *bc, const vp9_prob *p) { - return (B_PREDICTION_MODE) treed_read(bc, vp9_sub_mv_ref_tree, p); +static B_PREDICTION_MODE sub_mv_ref(vp9_reader *r, const vp9_prob *p) { + return (B_PREDICTION_MODE) treed_read(r, vp9_sub_mv_ref_tree, p); } #ifdef VPX_MODE_COUNT @@ -486,68 +461,72 @@ static const unsigned char mbsplit_fill_offset[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }; -static void read_switchable_interp_probs(VP9D_COMP* const pbi, - BOOL_DECODER* const bc) { +static void read_switchable_interp_probs(VP9D_COMP* const pbi, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; int i, j; - for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j) { - for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { - cm->fc.switchable_interp_prob[j][i] = vp9_read_prob(bc); - } - } - //printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0], - //cm->fc.switchable_interp_prob[1]); + for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j) + for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) + cm->fc.switchable_interp_prob[j][i] = vp9_read_prob(r); +} + +static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { + COMPPREDMODE_TYPE mode = vp9_read_bit(r); + if (mode) + mode += vp9_read_bit(r); + return mode; } -static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { +static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - nmv_context *const nmvc = &pbi->common.fc.nmvc; - MACROBLOCKD *const xd = &pbi->mb; if (cm->frame_type == KEY_FRAME) { if (!cm->kf_ymode_probs_update) - cm->kf_ymode_probs_index = vp9_read_literal(bc, 3); + cm->kf_ymode_probs_index = vp9_read_literal(r, 3); } else { + nmv_context *const nmvc = &pbi->common.fc.nmvc; + MACROBLOCKD *const xd = &pbi->mb; + int i, j; + if (cm->mcomp_filter_type == SWITCHABLE) - read_switchable_interp_probs(pbi, bc); + read_switchable_interp_probs(pbi, r); #if CONFIG_COMP_INTERINTRA_PRED if (cm->use_interintra) { - if (vp9_read(bc, VP9_UPD_INTERINTRA_PROB)) - cm->fc.interintra_prob = vp9_read_prob(bc); + if (vp9_read(r, VP9_UPD_INTERINTRA_PROB)) + cm->fc.interintra_prob = vp9_read_prob(r); } #endif - // Decode the baseline probabilities for decoding reference frame - cm->prob_intra_coded = vp9_read_prob(bc); - cm->prob_last_coded = vp9_read_prob(bc); - cm->prob_gf_coded = vp9_read_prob(bc); + // Baseline probabilities for decoding reference frame + cm->prob_intra_coded = vp9_read_prob(r); + cm->prob_last_coded = vp9_read_prob(r); + cm->prob_gf_coded = vp9_read_prob(r); // Computes a modified set of probabilities for use when reference // frame prediction fails. vp9_compute_mod_refprobs(cm); - cm->comp_pred_mode = vp9_read_bit(bc); - if (cm->comp_pred_mode) - cm->comp_pred_mode += vp9_read_bit(bc); - - if (cm->comp_pred_mode == HYBRID_PREDICTION) { - int i; + cm->comp_pred_mode = read_comp_pred_mode(r); + if (cm->comp_pred_mode == HYBRID_PREDICTION) for (i = 0; i < COMP_PRED_CONTEXTS; i++) - cm->prob_comppred[i] = vp9_read_prob(bc); - } + cm->prob_comppred[i] = vp9_read_prob(r); - if (vp9_read_bit(bc)) { - int i; + // VP9_YMODES + if (vp9_read_bit(r)) for (i = 0; i < VP9_YMODES - 1; ++i) - cm->fc.ymode_prob[i] = vp9_read_prob(bc); - } + cm->fc.ymode_prob[i] = vp9_read_prob(r); - if (vp9_read_bit(bc)) { - int i; + // VP9_I32X32_MODES + if (vp9_read_bit(r)) for (i = 0; i < VP9_I32X32_MODES - 1; ++i) - cm->fc.sb_ymode_prob[i] = vp9_read_prob(bc); + cm->fc.sb_ymode_prob[i] = vp9_read_prob(r); + + for (j = 0; j < PARTITION_PLANES; j++) { + if (vp9_read_bit(r)) { + for (i = 0; i < PARTITION_TYPES - 1; i++) + cm->fc.partition_prob[j][i] = vp9_read_prob(r); + } } - read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv); + read_nmvprobs(r, nmvc, xd->allow_high_precision_mv); } } @@ -556,12 +535,12 @@ static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { // value static void read_mb_segment_id(VP9D_COMP *pbi, int mb_row, int mb_col, - BOOL_DECODER* const bc) { + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; MODE_INFO *mi = xd->mode_info_context; MB_MODE_INFO *mbmi = &mi->mbmi; - int mb_index = mb_row * pbi->common.mb_cols + mb_col; + int mb_index = mb_row * cm->mb_cols + mb_col; if (xd->segmentation_enabled) { if (xd->update_mb_segmentation_map) { @@ -572,7 +551,7 @@ static void read_mb_segment_id(VP9D_COMP *pbi, vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_SEG_ID); // Read the prediction status flag - unsigned char seg_pred_flag = vp9_read(bc, pred_prob); + unsigned char seg_pred_flag = vp9_read(r, pred_prob); // Store the prediction flag. vp9_set_pred_flag(xd, PRED_SEG_ID, seg_pred_flag); @@ -583,11 +562,11 @@ static void read_mb_segment_id(VP9D_COMP *pbi, mbmi->segment_id = vp9_get_pred_mb_segid(cm, xd, mb_index); } else { // Decode it explicitly - read_mb_segid_except(cm, bc, mbmi, xd, mb_row, mb_col); + read_mb_segid_except(cm, r, mbmi, xd, mb_row, mb_col); } } else { // Normal unpredicted coding mode - read_mb_segid(bc, mbmi, xd); + read_mb_segid(r, mbmi, xd); } if (mbmi->sb_type) { @@ -599,8 +578,8 @@ static void read_mb_segment_id(VP9D_COMP *pbi, for (y = 0; y < ymbs; y++) { for (x = 0; x < xmbs; x++) { - cm->last_frame_seg_map[mb_index + x + y * cm->mb_cols] = - mbmi->segment_id; + const int index = y * cm->mb_cols + x; + cm->last_frame_seg_map[mb_index + index] = mbmi->segment_id; } } } else { @@ -644,20 +623,19 @@ static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src, mb_to_bottom_edge); } -static INLINE void process_mv(BOOL_DECODER* bc, MV *mv, MV *ref, +static INLINE void process_mv(vp9_reader *r, MV *mv, MV *ref, nmv_context *nmvc, nmv_context_counts *mvctx, int usehp) { - read_nmv(bc, mv, ref, nmvc); - read_nmv_fp(bc, mv, ref, nmvc, usehp); + read_nmv(r, mv, ref, nmvc); + read_nmv_fp(r, mv, ref, nmvc, usehp); vp9_increment_nmv(mv, ref, mvctx, usehp); mv->row += ref->row; mv->col += ref->col; } static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( - VP9D_COMP *pbi, BOOL_DECODER* bc) { - const int index = treed_read(bc, - vp9_switchable_interp_tree, + VP9D_COMP *pbi, vp9_reader *r) { + const int index = treed_read(r, vp9_switchable_interp_tree, vp9_get_pred_probs(&pbi->common, &pbi->mb, PRED_SWITCHABLE_INTERP)); return vp9_switchable_interp[index]; @@ -666,10 +644,10 @@ static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, MODE_INFO *prev_mi, int mb_row, int mb_col, - BOOL_DECODER* const bc) { + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - nmv_context *const nmvc = &pbi->common.fc.nmvc; - const int mis = pbi->common.mode_info_stride; + nmv_context *const nmvc = &cm->fc.nmvc; + const int mis = cm->mode_info_stride; MACROBLOCKD *const xd = &pbi->mb; int_mv *const mv = &mbmi->mv[0]; @@ -703,23 +681,15 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; // Read the macroblock segment id. - read_mb_segment_id(pbi, mb_row, mb_col, bc); + read_mb_segment_id(pbi, mb_row, mb_col, r); - if (pbi->common.mb_no_coeff_skip && - (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP))) { - // Read the macroblock coeff skip flag if this feature is in use, - // else default to 0 - mbmi->mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); - } else { - mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id, - SEG_LVL_SKIP); - } + mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id, + SEG_LVL_SKIP); + if (!mbmi->mb_skip_coeff) + mbmi->mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); // Read the reference frame - mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id); - - // if (pbi->common.current_video_frame == 1) - // printf("ref frame: %d [%d %d]\n", mbmi->ref_frame, mb_row, mb_col); + mbmi->ref_frame = read_ref_frame(pbi, r, mbmi->segment_id); // If reference frame is an Inter frame if (mbmi->ref_frame) { @@ -759,8 +729,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) { mbmi->mode = ZEROMV; } else { - mbmi->mode = mbmi->sb_type ? read_sb_mv_ref(bc, mv_ref_p) - : read_mv_ref(bc, mv_ref_p); + mbmi->mode = mbmi->sb_type ? read_sb_mv_ref(r, mv_ref_p) + : read_mv_ref(r, mv_ref_p); vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]); } @@ -785,13 +755,13 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) { mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE - ? read_switchable_filter_type(pbi, bc) + ? read_switchable_filter_type(pbi, r) : cm->mcomp_filter_type; } if (cm->comp_pred_mode == COMP_PREDICTION_ONLY || (cm->comp_pred_mode == HYBRID_PREDICTION && - vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_COMP)))) { + vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_COMP)))) { /* Since we have 3 reference frames, we can only have 3 unique * combinations of combinations of 2 different reference frames * (A-G, G-L or A-L). In the bitstream, we use this to simply @@ -838,17 +808,17 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (pbi->common.use_interintra && mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV && mbmi->second_ref_frame == NONE) { - mbmi->second_ref_frame = (vp9_read(bc, pbi->common.fc.interintra_prob) ? + mbmi->second_ref_frame = (vp9_read(r, pbi->common.fc.interintra_prob) ? INTRA_FRAME : NONE); // printf("-- %d (%d)\n", mbmi->second_ref_frame == INTRA_FRAME, // pbi->common.fc.interintra_prob); pbi->common.fc.interintra_counts[ mbmi->second_ref_frame == INTRA_FRAME]++; if (mbmi->second_ref_frame == INTRA_FRAME) { - mbmi->interintra_mode = read_ymode(bc, pbi->common.fc.ymode_prob); + mbmi->interintra_mode = read_ymode(r, pbi->common.fc.ymode_prob); pbi->common.fc.ymode_counts[mbmi->interintra_mode]++; #if SEPARATE_INTERINTRA_UV - mbmi->interintra_uv_mode = read_uv_mode(bc, + mbmi->interintra_uv_mode = read_uv_mode(r, pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]); pbi->common.fc.uv_mode_counts[mbmi->interintra_mode] [mbmi->interintra_uv_mode]++; @@ -870,7 +840,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // Encode the index of the choice. best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]); + vp9_read_mv_ref_id(r, xd->mb_mv_ref_probs[ref_frame]); best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; @@ -879,7 +849,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // Encode the index of the choice. best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]); + vp9_read_mv_ref_id(r, xd->mb_mv_ref_probs[ref_frame]); best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; } } @@ -888,7 +858,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->uv_mode = DC_PRED; switch (mbmi->mode) { case SPLITMV: { - const int s = treed_read(bc, vp9_mbsplit_tree, cm->fc.mbsplit_prob); + const int s = treed_read(r, vp9_mbsplit_tree, cm->fc.mbsplit_prob); const int num_p = vp9_mbsplit_count[s]; int j = 0; @@ -911,16 +881,16 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, second_abovemv.as_int = above_block_second_mv(mi, k, mis); } mv_contz = vp9_mv_cont(&leftmv, &abovemv); - blockmode = sub_mv_ref(bc, cm->fc.sub_mv_ref_prob [mv_contz]); + blockmode = sub_mv_ref(r, cm->fc.sub_mv_ref_prob[mv_contz]); cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++; switch (blockmode) { case NEW4X4: - process_mv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc, + process_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount, xd->allow_high_precision_mv); if (mbmi->second_ref_frame > 0) - process_mv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, + process_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, &cm->fc.NMVcount, xd->allow_high_precision_mv); #ifdef VPX_MODE_COUNT @@ -1029,7 +999,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, break; case NEWMV: - process_mv(bc, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount, + process_mv(r, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount, xd->allow_high_precision_mv); mbmi->need_to_clamp_mvs = check_mv_bounds(mv, mb_to_left_edge, @@ -1038,7 +1008,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_bottom_edge); if (mbmi->second_ref_frame > 0) { - process_mv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc, + process_mv(r, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc, &cm->fc.NMVcount, xd->allow_high_precision_mv); mbmi->need_to_clamp_secondmv = check_mv_bounds(&mbmi->mv[1], mb_to_left_edge, @@ -1058,23 +1028,23 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mv[0].as_int = 0; if (mbmi->sb_type) { - mbmi->mode = read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); - pbi->common.fc.sb_ymode_counts[mbmi->mode]++; + mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob); + cm->fc.sb_ymode_counts[mbmi->mode]++; } else { - mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob); - pbi->common.fc.ymode_counts[mbmi->mode]++; + mbmi->mode = read_ymode(r, cm->fc.ymode_prob); + cm->fc.ymode_counts[mbmi->mode]++; } // If MB mode is I4X4_PRED read the block modes if (mbmi->mode == I4X4_PRED) { int j = 0; do { - int m = read_bmode(bc, pbi->common.fc.bmode_prob); + int m = read_bmode(r, cm->fc.bmode_prob); mi->bmi[j].as_mode.first = m; #if CONFIG_NEWBINTRAMODES if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; #endif - pbi->common.fc.bmode_counts[m]++; + cm->fc.bmode_counts[m]++; } while (++j < 16); } @@ -1082,21 +1052,21 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int i; for (i = 0; i < 4; i++) { const int ib = vp9_i8x8_block[i]; - const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); + const int mode8x8 = read_i8x8_mode(r, cm->fc.i8x8_mode_prob); mi->bmi[ib + 0].as_mode.first = mode8x8; mi->bmi[ib + 1].as_mode.first = mode8x8; mi->bmi[ib + 4].as_mode.first = mode8x8; mi->bmi[ib + 5].as_mode.first = mode8x8; - pbi->common.fc.i8x8_mode_counts[mode8x8]++; + cm->fc.i8x8_mode_counts[mode8x8]++; } } else { - mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob[mbmi->mode]); - pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; + mbmi->uv_mode = read_uv_mode(r, cm->fc.uv_mode_prob[mbmi->mode]); + cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; } } /* - if (pbi->common.current_video_frame == 1) + if (cm->current_video_frame == 1) printf("mode: %d skip: %d\n", mbmi->mode, mbmi->mb_skip_coeff); */ @@ -1105,12 +1075,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV && mbmi->partitioning == PARTITIONING_4X4)))) { // FIXME(rbultje) code ternary symbol once all experiments are merged - mbmi->txfm_size = vp9_read(bc, cm->prob_tx[0]); + mbmi->txfm_size = vp9_read(r, cm->prob_tx[0]); if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { - mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); + mbmi->txfm_size += vp9_read(r, cm->prob_tx[1]); if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 && mbmi->txfm_size != TX_8X8) - mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); + mbmi->txfm_size += vp9_read(r, cm->prob_tx[2]); } } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 && cm->txfm_mode >= ALLOW_32X32) { @@ -1129,18 +1099,16 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } } -void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { +void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, vp9_reader *r) { VP9_COMMON *cm = &pbi->common; + int k; + // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove. vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs)); - if (pbi->common.mb_no_coeff_skip) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - cm->mbskip_pred_probs[k] = vp9_read_prob(bc); - } - } + for (k = 0; k < MBSKIP_CONTEXTS; ++k) + cm->mbskip_pred_probs[k] = vp9_read_prob(r); - mb_mode_mv_init(pbi, bc); + mb_mode_mv_init(pbi, r); } #if CONFIG_CODE_NONZEROCOUNT @@ -1149,24 +1117,24 @@ static uint16_t read_nzc(VP9_COMMON *const cm, TX_SIZE tx_size, int ref, int type, - BOOL_DECODER* const bc) { + vp9_reader *r) { int c, e; uint16_t nzc; if (!get_nzc_used(tx_size)) return 0; if (tx_size == TX_32X32) { - c = treed_read(bc, vp9_nzc32x32_tree, + c = treed_read(r, vp9_nzc32x32_tree, cm->fc.nzc_probs_32x32[nzc_context][ref][type]); cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; } else if (tx_size == TX_16X16) { - c = treed_read(bc, vp9_nzc16x16_tree, + c = treed_read(r, vp9_nzc16x16_tree, cm->fc.nzc_probs_16x16[nzc_context][ref][type]); cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; } else if (tx_size == TX_8X8) { - c = treed_read(bc, vp9_nzc8x8_tree, + c = treed_read(r, vp9_nzc8x8_tree, cm->fc.nzc_probs_8x8[nzc_context][ref][type]); cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; } else if (tx_size == TX_4X4) { - c = treed_read(bc, vp9_nzc4x4_tree, + c = treed_read(r, vp9_nzc4x4_tree, cm->fc.nzc_probs_4x4[nzc_context][ref][type]); cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; } else { @@ -1177,7 +1145,7 @@ static uint16_t read_nzc(VP9_COMMON *const cm, int x = 0; while (e--) { int b = vp9_read( - bc, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]); + r, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]); x |= (b << e); cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++; } @@ -1198,7 +1166,7 @@ static void read_nzcs_sb64(VP9_COMMON *const cm, MACROBLOCKD* xd, int mb_row, int mb_col, - BOOL_DECODER* const bc) { + vp9_reader *r) { MODE_INFO *m = xd->mode_info_context; MB_MODE_INFO *const mi = &m->mbmi; int j, nzc_context; @@ -1216,44 +1184,44 @@ static void read_nzcs_sb64(VP9_COMMON *const cm, case TX_32X32: for (j = 0; j < 256; j += 64) { nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, r); } for (j = 256; j < 384; j += 64) { nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, r); } break; case TX_16X16: for (j = 0; j < 256; j += 16) { nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, r); } for (j = 256; j < 384; j += 16) { nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, r); } break; case TX_8X8: for (j = 0; j < 256; j += 4) { nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, r); } for (j = 256; j < 384; j += 4) { nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r); } break; case TX_4X4: for (j = 0; j < 256; ++j) { nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, r); } for (j = 256; j < 384; ++j) { nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r); } break; @@ -1266,7 +1234,7 @@ static void read_nzcs_sb32(VP9_COMMON *const cm, MACROBLOCKD* xd, int mb_row, int mb_col, - BOOL_DECODER* const bc) { + vp9_reader *r) { MODE_INFO *m = xd->mode_info_context; MB_MODE_INFO *const mi = &m->mbmi; int j, nzc_context; @@ -1284,44 +1252,44 @@ static void read_nzcs_sb32(VP9_COMMON *const cm, case TX_32X32: for (j = 0; j < 64; j += 64) { nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, r); } for (j = 64; j < 96; j += 16) { nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, r); } break; case TX_16X16: for (j = 0; j < 64; j += 16) { nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, r); } for (j = 64; j < 96; j += 16) { nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, r); } break; case TX_8X8: for (j = 0; j < 64; j += 4) { nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, r); } for (j = 64; j < 96; j += 4) { nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r); } break; case TX_4X4: for (j = 0; j < 64; ++j) { nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, r); } for (j = 64; j < 96; ++j) { nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r); } break; @@ -1334,7 +1302,7 @@ static void read_nzcs_mb16(VP9_COMMON *const cm, MACROBLOCKD* xd, int mb_row, int mb_col, - BOOL_DECODER* const bc) { + vp9_reader *r) { MODE_INFO *m = xd->mode_info_context; MB_MODE_INFO *const mi = &m->mbmi; int j, nzc_context; @@ -1352,28 +1320,28 @@ static void read_nzcs_mb16(VP9_COMMON *const cm, case TX_16X16: for (j = 0; j < 16; j += 16) { nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, r); } for (j = 16; j < 24; j += 4) { nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r); } break; case TX_8X8: for (j = 0; j < 16; j += 4) { nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, r); } if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { for (j = 16; j < 24; ++j) { nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r); } } else { for (j = 16; j < 24; j += 4) { nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r); } } break; @@ -1381,11 +1349,11 @@ static void read_nzcs_mb16(VP9_COMMON *const cm, case TX_4X4: for (j = 0; j < 16; ++j) { nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, r); } for (j = 16; j < 24; ++j) { nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r); } break; @@ -1399,27 +1367,27 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MACROBLOCKD* const xd, int mb_row, int mb_col, - BOOL_DECODER* const bc) { + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MODE_INFO *mi = xd->mode_info_context; MODE_INFO *prev_mi = xd->prev_mode_info_context; MB_MODE_INFO *const mbmi = &mi->mbmi; - if (pbi->common.frame_type == KEY_FRAME) { - kfread_modes(pbi, mi, mb_row, mb_col, bc); + if (cm->frame_type == KEY_FRAME) { + kfread_modes(pbi, mi, mb_row, mb_col, r); } else { - read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc); + read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, r); set_scale_factors(xd, mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1, - pbi->common.active_ref_scale); + cm->active_ref_scale); } #if CONFIG_CODE_NONZEROCOUNT if (mbmi->sb_type == BLOCK_SIZE_SB64X64) - read_nzcs_sb64(cm, xd, mb_row, mb_col, bc); + read_nzcs_sb64(cm, xd, mb_row, mb_col, r); else if (mbmi->sb_type == BLOCK_SIZE_SB32X32) - read_nzcs_sb32(cm, xd, mb_row, mb_col, bc); + read_nzcs_sb32(cm, xd, mb_row, mb_col, r); else - read_nzcs_mb16(cm, xd, mb_row, mb_col, bc); + read_nzcs_mb16(cm, xd, mb_row, mb_col, r); #endif // CONFIG_CODE_NONZEROCOUNT if (mbmi->sb_type) { diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h index 5cd935760..bf5e83c77 100644 --- a/vp9/decoder/vp9_decodemv.h +++ b/vp9/decoder/vp9_decodemv.h @@ -17,7 +17,7 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MACROBLOCKD* const xd, int mb_row, int mb_col, - BOOL_DECODER* const bc); -void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc); + vp9_reader *r); +void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, vp9_reader *r); #endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 6376090ed..47498c1ca 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -204,26 +204,16 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { const TX_TYPE tx_type = get_tx_type_16x16(xd, 0); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff, - xd->block[0].dequant, xd->dst.y_buffer, - xd->dst.y_buffer, xd->dst.y_stride, - xd->dst.y_stride, xd->plane[0].eobs[0]); - } else { - vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant, - xd->dst.y_buffer, xd->dst.y_buffer, - xd->dst.y_stride, xd->dst.y_stride, - xd->plane[0].eobs[0]); - } + vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff, + xd->block[0].dequant, xd->dst.y_buffer, + xd->dst.y_stride, xd->plane[0].eobs[0]); vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.u_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, + xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs[0]); vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[20].dequant, - xd->dst.v_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, + xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs[0]); } @@ -248,22 +238,13 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra8x8_predict(xd, b, i8x8mode, dst, stride); } tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, dst, stride, stride, - xd->plane[0].eobs[idx]); - } else { - vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, - xd->plane[0].eobs[idx]); - } + vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, stride, + xd->plane[0].eobs[idx]); } } else { vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff, - xd->block[0].dequant, - xd->dst.y_buffer, - xd->dst.y_stride, - xd->dst.y_buffer, - xd->dst.y_stride, - xd); + xd->block[0].dequant, xd->dst.y_buffer, + xd->dst.y_stride, xd); } // chroma @@ -278,38 +259,48 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, b->dst_stride, xd->plane[1].eobs[i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, b->dst_stride, xd->plane[2].eobs[i]); } } else if (mode == SPLITMV) { xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, - xd->dst.uv_stride, xd->plane[1].eobs); + xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, - xd->dst.uv_stride, xd->plane[2].eobs); + xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else { vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.u_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, + xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs[0]); vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->dst.v_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, + xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs[0]); } } +static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) { + BLOCKD *const b = &xd->block[idx]; + struct mb_plane *const y = &xd->plane[0]; + if (tx_type != DCT_DCT) { + vp9_dequant_iht_add_c(tx_type, + BLOCK_OFFSET(y->qcoeff, idx, 16), + b->dequant, *(b->base_dst) + b->dst, + b->dst_stride, y->eobs[idx]); + } else { + xd->itxm_add(BLOCK_OFFSET(y->qcoeff, idx, 16), + b->dequant, *(b->base_dst) + b->dst, + b->dst_stride, y->eobs[idx]); + } +} + + static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { TX_TYPE tx_type; @@ -325,35 +316,20 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra8x8_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, b->dst_stride); for (j = 0; j < 4; j++) { - b = &xd->block[ib + iblock[j]]; tx_type = get_tx_type_4x4(xd, ib + iblock[j]); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, - b->dst_stride, - xd->plane[0].eobs[ib + iblock[j]]); - } else { - xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, - xd->plane[0].eobs[ib + iblock[j]]); - } + dequant_add_y(xd, tx_type, ib + iblock[j]); } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, b->dst_stride, xd->plane[1].eobs[i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, b->dst_stride, xd->plane[2].eobs[i]); } } else if (mode == I4X4_PRED) { @@ -369,18 +345,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst, b->dst_stride); tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, - b->dst_stride, xd->plane[0].eobs[i]); - } else { - xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, - xd->plane[0].eobs[i]); - } + dequant_add_y(xd, tx_type, i); } #if CONFIG_NEWBINTRAMODES if (!xd->mode_info_context->mbmi.mb_skip_coeff) @@ -388,47 +353,28 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, #endif vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16); xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, - xd->dst.uv_stride, xd->plane[1].eobs); + xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, - xd->dst.uv_stride, xd->plane[2].eobs); + xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) { xd->itxm_add_y_block(xd->plane[0].qcoeff, xd->block[0].dequant, - xd->dst.y_buffer, xd->dst.y_stride, - xd->dst.y_buffer, - xd->dst.y_stride, - xd); + xd->dst.y_buffer, xd->dst.y_stride, xd); xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, - xd->dst.uv_stride, xd->plane[1].eobs); + xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, - xd->dst.uv_stride, xd->plane[2].eobs); + xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else { for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, - b->dst_stride, xd->plane[0].eobs[i]); - } else { - xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, - xd->plane[0].eobs[i]); - } + dequant_add_y(xd, tx_type, i); } xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, - xd->dst.uv_stride, xd->plane[1].eobs); + xd->dst.u_buffer, xd->dst.uv_stride, + xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, - xd->dst.uv_stride, xd->plane[2].eobs); + xd->dst.v_buffer, xd->dst.uv_stride, + xd->plane[2].eobs); } } @@ -444,9 +390,7 @@ static INLINE void decode_sby_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) { const int y_offset = (y_idx * 32) * mb->dst.y_stride + (x_idx * 32); vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 1024), mb->block[0].dequant , - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, + mb->dst.y_buffer + y_offset, mb->dst.y_stride, mb->plane[0].eobs[n * 64]); } } @@ -463,15 +407,11 @@ static INLINE void decode_sbuv_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) { vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 1024), mb->block[16].dequant, mb->dst.u_buffer + uv_offset, - mb->dst.u_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->plane[1].eobs[n * 64]); + mb->dst.uv_stride, mb->plane[1].eobs[n * 64]); vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 1024), mb->block[20].dequant, mb->dst.v_buffer + uv_offset, - mb->dst.v_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->plane[2].eobs[n * 64]); + mb->dst.uv_stride, mb->plane[2].eobs[n * 64]); } } @@ -487,22 +427,12 @@ static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) { const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16); const TX_TYPE tx_type = get_tx_type_16x16(mb, (y_idx * (4 * bw) + x_idx) * 4); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), - mb->block[0].dequant , - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->plane[0].eobs[n * 16]); - } else { - vp9_dequant_iht_add_16x16_c(tx_type, - BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->plane[0].eobs[n * 16]); - } + vp9_dequant_iht_add_16x16_c(tx_type, + BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), + mb->block[0].dequant, + mb->dst.y_buffer + y_offset, + mb->dst.y_stride, + mb->plane[0].eobs[n * 16]); } } @@ -520,15 +450,11 @@ static INLINE void decode_sbuv_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) { const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16); vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256), mb->block[16].dequant, - mb->dst.u_buffer + uv_offset, - mb->dst.u_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, + mb->dst.u_buffer + uv_offset, mb->dst.uv_stride, mb->plane[1].eobs[n * 16]); vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256), mb->block[20].dequant, - mb->dst.v_buffer + uv_offset, - mb->dst.v_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, + mb->dst.v_buffer + uv_offset, mb->dst.uv_stride, mb->plane[2].eobs[n * 16]); } } @@ -546,22 +472,12 @@ static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8); const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * (2 * bw) + x_idx) * 2); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, - xd->plane[0].eobs[n * 4]); - } else { - vp9_dequant_iht_add_8x8_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, - xd->plane[0].eobs[n * 4]); - } + + vp9_dequant_iht_add_8x8_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, xd->dst.y_stride, + xd->plane[0].eobs[n * 4]); } } @@ -576,18 +492,14 @@ static INLINE void decode_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int x_idx = n & (bw - 1); const int y_idx = n >> (bwl - 1); const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8); - vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64), - xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, - xd->plane[1].eobs[n * 4]); - vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64), - xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, - xd->plane[2].eobs[n * 4]); + vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64), + xd->block[16].dequant, + xd->dst.u_buffer + uv_offset, xd->dst.uv_stride, + xd->plane[1].eobs[n * 4]); + vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64), + xd->block[20].dequant, + xd->dst.v_buffer + uv_offset, xd->dst.uv_stride, + xd->plane[2].eobs[n * 4]); } } @@ -605,19 +517,13 @@ static INLINE void decode_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { if (tx_type == DCT_DCT) { xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16), xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, + xd->dst.y_buffer + y_offset, xd->dst.y_stride, xd->plane[0].eobs[n]); } else { vp9_dequant_iht_add_c(tx_type, BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16), - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, - xd->dst.y_stride, - xd->plane[0].eobs[n]); + xd->block[0].dequant, xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->plane[0].eobs[n]); } } } @@ -634,14 +540,10 @@ static INLINE void decode_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4); xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16), xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[n]); + xd->dst.u_buffer + uv_offset, xd->dst.uv_stride, xd->plane[1].eobs[n]); xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16), xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[n]); + xd->dst.v_buffer + uv_offset, xd->dst.uv_stride, xd->plane[2].eobs[n]); } } @@ -844,10 +746,8 @@ static int get_delta_q(vp9_reader *r, int *dq) { const int old_value = *dq; if (vp9_read_bit(r)) { // Update bit - int value = vp9_read_literal(r, 4); - if (vp9_read_bit(r)) // Sign bit - value = -value; - *dq = value; + const int value = vp9_read_literal(r, 4); + *dq = vp9_read_and_apply_sign(r, value); } // Trigger a quantizer update if the delta-q value has changed @@ -915,10 +815,83 @@ static void set_refs(VP9D_COMP *pbi, int mb_row, int mb_col) { } } -/* Decode a row of Superblocks (2x2 region of MBs) */ -static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) { +static void decode_modes_b(VP9D_COMP *pbi, int mb_row, int mb_col, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &pbi->mb; + + set_offsets(pbi, bsize, mb_row, mb_col); + vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, r); + set_refs(pbi, mb_row, mb_col); + + // TODO(jingning): merge decode_sb_ and decode_mb_ + if (bsize > BLOCK_SIZE_MB16X16) + decode_sb(pbi, xd, mb_row, mb_col, r, bsize); + else + decode_mb(pbi, xd, mb_row, mb_col, r); + + xd->corrupted |= bool_error(r); +} + +static void decode_modes_sb(VP9D_COMP *pbi, int mb_row, int mb_col, + vp9_reader* r, BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; + int bsl = mb_width_log2(bsize), bs = (1 << bsl) / 2; + int n; + PARTITION_TYPE partition = PARTITION_NONE; + BLOCK_SIZE_TYPE subsize; + + if (mb_row >= pc->mb_rows || mb_col >= pc->mb_cols) + return; + + if (bsize > BLOCK_SIZE_MB16X16) { + // read the partition information + partition = treed_read(r, vp9_partition_tree, + pc->fc.partition_prob[bsl - 1]); + pc->fc.partition_counts[bsl - 1][partition]++; + } + + switch (partition) { + case PARTITION_NONE: + subsize = bsize; + decode_modes_b(pbi, mb_row, mb_col, r, subsize); + break; +#if CONFIG_SBSEGMENT + case PARTITION_HORZ: + subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 : + BLOCK_SIZE_SB32X16; + decode_modes_b(pbi, mb_row, mb_col, r, subsize); + if ((mb_row + bs) < pc->mb_rows) + decode_modes_b(pbi, mb_row + bs, mb_col, r, subsize); + break; + case PARTITION_VERT: + subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 : + BLOCK_SIZE_SB16X32; + decode_modes_b(pbi, mb_row, mb_col, r, subsize); + if ((mb_col + bs) < pc->mb_cols) + decode_modes_b(pbi, mb_row, mb_col + bs, r, subsize); + break; +#endif + case PARTITION_SPLIT: + subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X32 : + BLOCK_SIZE_MB16X16; + for (n = 0; n < 4; n++) { + int j = n >> 1, i = n & 0x01; + if (subsize == BLOCK_SIZE_SB32X32) + xd->sb_index = n; + else + xd->mb_index = n; + decode_modes_sb(pbi, mb_row + j * bs, mb_col + i * bs, r, subsize); + } + break; + default: + assert(0); + } +} + +/* Decode a row of Superblocks (4x4 region of MBs) */ +static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) { + VP9_COMMON *const pc = &pbi->common; int mb_col; // For a SB there are 2 left contexts, each pertaining to a MB row within @@ -926,60 +899,10 @@ static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) { for (mb_col = pc->cur_tile_mb_col_start; mb_col < pc->cur_tile_mb_col_end; mb_col += 4) { - if (vp9_read(r, pc->prob_sb64_coded)) { - // SB64 decoding - set_offsets(pbi, BLOCK_SIZE_SB64X64, mb_row, mb_col); - vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, r); - set_refs(pbi, mb_row, mb_col); - decode_sb(pbi, xd, mb_row, mb_col, r, BLOCK_SIZE_SB64X64); - xd->corrupted |= bool_error(r); - } else { - // not SB64 - int j; - for (j = 0; j < 4; j++) { - const int x_idx_sb = mb_col + 2 * (j % 2); - const int y_idx_sb = mb_row + 2 * (j / 2); - - if (y_idx_sb >= pc->mb_rows || x_idx_sb >= pc->mb_cols) - continue; // MB lies outside frame, skip on to next - - xd->sb_index = j; - - if (vp9_read(r, pc->prob_sb32_coded)) { - // SB32 decoding - set_offsets(pbi, BLOCK_SIZE_SB32X32, y_idx_sb, x_idx_sb); - vp9_decode_mb_mode_mv(pbi, xd, y_idx_sb, x_idx_sb, r); - set_refs(pbi, y_idx_sb, x_idx_sb); - decode_sb(pbi, xd, y_idx_sb, x_idx_sb, r, BLOCK_SIZE_SB32X32); - xd->corrupted |= bool_error(r); - } else { - // not SB32 - // Process the 4 MBs within the SB in the order: - // top-left, top-right, bottom-left, bottom-right - int i; - for (i = 0; i < 4; i++) { - const int x_idx_mb = x_idx_sb + (i % 2); - const int y_idx_mb = y_idx_sb + (i / 2); - - if (y_idx_mb >= pc->mb_rows || x_idx_mb >= pc->mb_cols) - continue; // MB lies outside frame, skip on to next - - xd->mb_index = i; - - // MB decoding - set_offsets(pbi, BLOCK_SIZE_MB16X16, y_idx_mb, x_idx_mb); - vp9_decode_mb_mode_mv(pbi, xd, y_idx_mb, x_idx_mb, r); - set_refs(pbi, y_idx_mb, x_idx_mb); - decode_mb(pbi, xd, y_idx_mb, x_idx_mb, r); - xd->corrupted |= bool_error(r); - } - } - } - } + decode_modes_sb(pbi, mb_row, mb_col, r, BLOCK_SIZE_SB64X64); } } - static void setup_token_decoder(VP9D_COMP *pbi, const uint8_t *data, vp9_reader *r) { @@ -1232,9 +1155,8 @@ static void setup_segmentation(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) { if (feature_enabled) { vp9_enable_segfeature(xd, i, j); data = vp9_decode_unsigned_max(r, vp9_seg_feature_data_max(j)); - if (vp9_is_segfeature_signed(j) && vp9_read_bit(r)) { - data = -data; - } + if (vp9_is_segfeature_signed(j)) + data = vp9_read_and_apply_sign(r, data); } vp9_set_segdata(xd, i, j, data); } @@ -1283,19 +1205,15 @@ static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) { for (i = 0; i < MAX_REF_LF_DELTAS; i++) { if (vp9_read_bit(r)) { - int value = vp9_read_literal(r, 6); - if (vp9_read_bit(r)) - value = -value; - xd->ref_lf_deltas[i] = value; + const int value = vp9_read_literal(r, 6); + xd->ref_lf_deltas[i] = vp9_read_and_apply_sign(r, value); } } for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { if (vp9_read_bit(r)) { - int value = vp9_read_literal(r, 6); - if (vp9_read_bit(r)) - value = -value; - xd->mode_lf_deltas[i] = value; + const int value = vp9_read_literal(r, 6); + xd->mode_lf_deltas[i] = vp9_read_and_apply_sign(r, value); } } } @@ -1395,6 +1313,7 @@ static void update_frame_context(VP9D_COMP *pbi, vp9_reader *r) { vp9_copy(fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob); vp9_copy(fc->pre_sub_mv_ref_prob, fc->sub_mv_ref_prob); vp9_copy(fc->pre_mbsplit_prob, fc->mbsplit_prob); + vp9_copy(fc->pre_partition_prob, fc->partition_prob); fc->pre_nmvc = fc->nmvc; vp9_zero(fc->coef_counts_4x4); @@ -1411,6 +1330,7 @@ static void update_frame_context(VP9D_COMP *pbi, vp9_reader *r) { vp9_zero(fc->mbsplit_counts); vp9_zero(fc->NMVcount); vp9_zero(fc->mv_ref_ct); + vp9_zero(fc->partition_counts); #if CONFIG_COMP_INTERINTRA_PRED fc->pre_interintra_prob = fc->interintra_prob; @@ -1602,8 +1522,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { setup_pred_probs(pc, &header_bc); - pc->prob_sb64_coded = vp9_read_prob(&header_bc); - pc->prob_sb32_coded = vp9_read_prob(&header_bc); xd->lossless = vp9_read_bit(&header_bc); pc->txfm_mode = xd->lossless ? ONLY_4X4 : read_txfm_mode(&header_bc); if (pc->txfm_mode == TX_MODE_SELECT) { @@ -1743,8 +1661,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff)); vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff)); - // Read the mb_no_coeff_skip flag - pc->mb_no_coeff_skip = vp9_read_bit(&header_bc); + vp9_read_bit(&header_bc); // unused vp9_decode_mode_mvs_init(pbi, &header_bc); @@ -1799,12 +1716,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { } #endif - // Find the end of the coded buffer - while (residual_bc.count > CHAR_BIT && - residual_bc.count < VP9_BD_VALUE_SIZE) { - residual_bc.count -= CHAR_BIT; - residual_bc.user_buffer--; - } - *p_data_end = residual_bc.user_buffer; + *p_data_end = vp9_reader_find_end(&residual_bc); return 0; } diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index ade216a0c..09302014e 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -16,105 +16,104 @@ #include "vp9/common/vp9_common.h" -static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, - uint8_t *dest, int stride, int width, int height) { +static void add_residual(const int16_t *diff, uint8_t *dest, int stride, + int width, int height) { int r, c; for (r = 0; r < height; r++) { for (c = 0; c < width; c++) - dest[c] = clip_pixel(diff[c] + pred[c]); + dest[c] = clip_pixel(diff[c] + dest[c]); dest += stride; diff += width; - pred += pitch; } } -void vp9_add_residual_4x4_c(const int16_t *diff, const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 4, 4); +void vp9_add_residual_4x4_c(const int16_t *diff, uint8_t *dest, int stride) { + add_residual(diff, dest, stride, 4, 4); } -void vp9_add_residual_8x8_c(const int16_t *diff, const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 8, 8); +void vp9_add_residual_8x8_c(const int16_t *diff, uint8_t *dest, int stride) { + add_residual(diff, dest, stride, 8, 8); } -void vp9_add_residual_16x16_c(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 16, 16); +void vp9_add_residual_16x16_c(const int16_t *diff, uint8_t *dest, int stride) { + add_residual(diff, dest, stride, 16, 16); } -void vp9_add_residual_32x32_c(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 32, 32); +void vp9_add_residual_32x32_c(const int16_t *diff,uint8_t *dest, int stride) { + add_residual(diff, dest, stride, 32, 32); } -static void add_constant_residual(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride, +static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, int width, int height) { int r, c; for (r = 0; r < height; r++) { for (c = 0; c < width; c++) - dest[c] = clip_pixel(diff + pred[c]); + dest[c] = clip_pixel(diff + dest[c]); dest += stride; - pred += pitch; } } -void vp9_add_constant_residual_8x8_c(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_constant_residual(diff, pred, pitch, dest, stride, 8, 8); +void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest, + int stride) { + add_constant_residual(diff, dest, stride, 8, 8); } -void vp9_add_constant_residual_16x16_c(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_constant_residual(diff, pred, pitch, dest, stride, 16, 16); +void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest, + int stride) { + add_constant_residual(diff, dest, stride, 16, 16); } -void vp9_add_constant_residual_32x32_c(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_constant_residual(diff, pred, pitch, dest, stride, 32, 32); +void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, + int stride) { + add_constant_residual(diff, dest, stride, 32, 32); } void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride, int eob) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); + uint8_t *dest, int stride, int eob) { - for (i = 0; i < 16; i++) - input[i] *= dq[i]; + if (tx_type == DCT_DCT) { + vp9_dequant_idct_add(input, dq, dest, stride, eob); + } else { + int i; + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - vp9_short_iht4x4(input, output, 4, tx_type); - vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); + for (i = 0; i < 16; i++) + input[i] *= dq[i]; + + vp9_short_iht4x4(input, output, 4, tx_type); + vpx_memset(input, 0, 32); + vp9_add_residual_4x4(output, dest, stride); + } } void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); - - if (eob > 0) { - int i; + const int16_t *dq, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_dequant_idct_add_8x8(input, dq, dest, stride, eob); + } else { + if (eob > 0) { + int i; + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); - input[0] *= dq[0]; - for (i = 1; i < 64; i++) - input[i] *= dq[1]; + input[0] *= dq[0]; + for (i = 1; i < 64; i++) + input[i] *= dq[1]; - vp9_short_iht8x8(input, output, 8, tx_type); - vpx_memset(input, 0, 128); - vp9_add_residual_8x8(output, pred, pitch, dest, stride); + vp9_short_iht8x8(input, output, 8, tx_type); + vpx_memset(input, 0, 128); + vp9_add_residual_8x8(output, dest, stride); + } } } -void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, int eob) { +void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *dest, + int stride, int eob) { int i; DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); @@ -125,15 +124,15 @@ void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, // the idct halves ( >> 1) the pitch vp9_short_idct4x4(input, output, 4 << 1); vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); + vp9_add_residual_4x4(output, dest, stride); } else { - vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride); + vp9_dc_only_idct_add(input[0]*dq[0], dest, dest, stride, stride); ((int *)input)[0] = 0; } } -void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, int dc) { +void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *dest, + int stride, int dc) { int i; DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); @@ -145,12 +144,11 @@ void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, // the idct halves ( >> 1) the pitch vp9_short_idct4x4(input, output, 4 << 1); vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); + vp9_add_residual_4x4(output, dest, stride); } void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride, int eob) { + uint8_t *dest, int stride, int eob) { int i; DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); @@ -160,17 +158,15 @@ void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, vp9_short_iwalsh4x4_c(input, output, 4 << 1); vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); + vp9_add_residual_4x4(output, dest, stride); } else { - vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride); + vp9_dc_only_inv_walsh_add(input[0]*dq[0], dest, dest, stride, stride); ((int *)input)[0] = 0; } } void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, - uint8_t *pred, - uint8_t *dest, - int pitch, int stride, int dc) { + uint8_t *dest, int stride, int dc) { int i; DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); @@ -181,12 +177,11 @@ void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, vp9_short_iwalsh4x4_c(input, output, 4 << 1); vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); + vp9_add_residual_4x4(output, dest, stride); } void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { + uint8_t *dest, int stride, int eob) { DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); // If dc is 1, then input[0] is the reconstructed value, do not need @@ -208,7 +203,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, vp9_short_idct1_8x8_c(&in, &out); input[0] = 0; - vp9_add_constant_residual_8x8(out, pred, pitch, dest, stride); + vp9_add_constant_residual_8x8(out, dest, stride); #if !CONFIG_SCATTERSCAN } else if (eob <= 10) { input[1] *= dq[1]; @@ -228,7 +223,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[16] = input[17] = 0; input[24] = 0; - vp9_add_residual_8x8(output, pred, pitch, dest, stride); + vp9_add_residual_8x8(output, dest, stride); #endif } else { int i; @@ -240,41 +235,36 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, // the idct halves ( >> 1) the pitch vp9_short_idct8x8(input, output, 8 << 1); vpx_memset(input, 0, 128); - vp9_add_residual_8x8(output, pred, pitch, dest, stride); + vp9_add_residual_8x8(output, dest, stride); } } } void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, + const int16_t *dq, + uint8_t *dest, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - - if (eob > 0) { - int i; - - input[0] *= dq[0]; - - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 256; i++) - input[i] *= dq[1]; - - // inverse hybrid transform - vp9_short_iht16x16(input, output, 16, tx_type); + if (tx_type == DCT_DCT) { + vp9_dequant_idct_add_16x16(input, dq, dest, stride, eob); + } else { + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - // the idct halves ( >> 1) the pitch - // vp9_short_idct16x16(input, output, 32); + if (eob > 0) { + int i; - vpx_memset(input, 0, 512); + input[0] *= dq[0]; + for (i = 1; i < 256; i++) + input[i] *= dq[1]; - vp9_add_residual_16x16(output, pred, pitch, dest, stride); + vp9_short_iht16x16(input, output, 16, tx_type); + vpx_memset(input, 0, 512); + vp9_add_residual_16x16(output, dest, stride); + } } } void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { + uint8_t *dest, int stride, int eob) { DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); /* The calculation can be simplified if there are not many non-zero dct @@ -289,7 +279,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, vp9_short_idct1_16x16_c(&in, &out); input[0] = 0; - vp9_add_constant_residual_16x16(out, pred, pitch, dest, stride); + vp9_add_constant_residual_16x16(out, dest, stride); #if !CONFIG_SCATTERSCAN } else if (eob <= 10) { input[0] *= dq[0]; @@ -312,7 +302,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, input[32] = input[33] = 0; input[48] = 0; - vp9_add_residual_16x16(output, pred, pitch, dest, stride); + vp9_add_residual_16x16(output, dest, stride); #endif } else { int i; @@ -326,21 +316,20 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, // the idct halves ( >> 1) the pitch vp9_short_idct16x16(input, output, 16 << 1); vpx_memset(input, 0, 512); - vp9_add_residual_16x16(output, pred, pitch, dest, stride); + vp9_add_residual_16x16(output, dest, stride); } } } void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { + uint8_t *dest, int stride, int eob) { DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024); if (eob) { input[0] = input[0] * dq[0] / 2; if (eob == 1) { vp9_short_idct1_32x32(input, output); - vp9_add_constant_residual_32x32(output[0], pred, pitch, dest, stride); + vp9_add_constant_residual_32x32(output[0], dest, stride); input[0] = 0; #if !CONFIG_SCATTERSCAN } else if (eob <= 10) { @@ -362,7 +351,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, input[64] = input[65] = 0; input[96] = 0; - vp9_add_residual_32x32(output, pred, pitch, dest, stride); + vp9_add_residual_32x32(output, dest, stride); #endif } else { int i; @@ -370,7 +359,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, input[i] = input[i] * dq[1] / 2; vp9_short_idct32x32(input, output, 64); vpx_memset(input, 0, 2048); - vp9_add_residual_32x32(output, pred, pitch, dest, stride); + vp9_add_residual_32x32(output, dest, stride); } } } diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 8b53dd9cb..a635a3b17 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -16,14 +16,11 @@ void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, - unsigned char *pred, - unsigned char *output, - int pitch, int stride, int eob); + unsigned char *dest, int stride, int eob); void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, - unsigned char *pred, - unsigned char *output, - int pitch, int stride, int dc); + unsigned char *output, int stride, + int dc); void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, @@ -33,30 +30,23 @@ void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dc); void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, int pre_stride, - unsigned char *dst, - int stride, + unsigned char *dst, int stride, struct macroblockd *xd); void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, - int pre_stride, unsigned char *dst, int stride, uint16_t *eobs); void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *dest, - int pitch, int stride, int eob); + unsigned char *dest, int stride, int eob); void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride, - int eob); + const int16_t *dq, unsigned char *dest, + int stride, int eob); void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, unsigned char *pred, - unsigned char *dest, - int pitch, int stride, int eob); + const int16_t *dq, unsigned char *dest, + int stride, int eob); #endif // VP9_DECODER_VP9_DEQUANTIZE_H_ diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 3df841b88..acb3710e4 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -60,11 +60,6 @@ static const vp9_prob cat6_prob[15] = { DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); -static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) { - return decode_bool(br, 128) ? -value_to_sign : value_to_sign; -} - - #define INCREMENT_COUNT(token) \ do { \ coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] \ @@ -77,7 +72,7 @@ static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) { #if CONFIG_CODE_NONZEROCOUNT #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = get_signed(br, val); \ + qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(br, val); \ INCREMENT_COUNT(token); \ c++; \ nzc++; \ @@ -86,7 +81,7 @@ static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) { #else #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = get_signed(br, val); \ + qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(br, val); \ INCREMENT_COUNT(token); \ c++; \ continue; \ @@ -125,9 +120,25 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { aidx = vp9_block2above_sb64[txfm_size][block_idx]; lidx = vp9_block2left_sb64[txfm_size][block_idx]; +#if CONFIG_SBSEGMENT + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X32) { + aidx = vp9_block2above_sb64x32[txfm_size][block_idx]; + lidx = vp9_block2left_sb64x32[txfm_size][block_idx]; + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X64) { + aidx = vp9_block2above_sb32x64[txfm_size][block_idx]; + lidx = vp9_block2left_sb32x64[txfm_size][block_idx]; +#endif } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { aidx = vp9_block2above_sb[txfm_size][block_idx]; lidx = vp9_block2left_sb[txfm_size][block_idx]; +#if CONFIG_SBSEGMENT + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X16) { + aidx = vp9_block2above_sb32x16[txfm_size][block_idx]; + lidx = vp9_block2left_sb32x16[txfm_size][block_idx]; + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB16X32) { + aidx = vp9_block2above_sb16x32[txfm_size][block_idx]; + lidx = vp9_block2left_sb16x32[txfm_size][block_idx]; +#endif } else { aidx = vp9_block2above[txfm_size][block_idx]; lidx = vp9_block2left[txfm_size][block_idx]; diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 0e3560189..7dd503baa 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -13,101 +13,78 @@ #include "vp9/decoder/vp9_dequantize.h" void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, - uint8_t *dst, - int stride, MACROBLOCKD *xd) { + uint8_t *dst, int stride, MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - vp9_dequant_idct_add(q, dq, pre, dst, pre_stride, stride, - xd->plane[0].eobs[i * 4 + j]); + vp9_dequant_idct_add(q, dq, dst, stride, xd->plane[0].eobs[i * 4 + j]); q += 16; - pre += 4; dst += 4; } - pre += 4 * pre_stride - 16; dst += 4 * stride - 16; } } void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, uint8_t *dst, - int stride, uint16_t *eobs) { + uint8_t *dst, int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dst, pre_stride, stride, - eobs[i * 2 + j]); + vp9_dequant_idct_add(q, dq, dst, stride, eobs[i * 2 + j]); q += 16; - pre += 4; dst += 4; } - pre += 4 * pre_stride - 8; dst += 4 * stride - 8; } } void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, - uint8_t *dst, - int stride, MACROBLOCKD *xd) { + uint8_t *dst, int stride, + MACROBLOCKD *xd) { uint8_t *origdest = dst; - uint8_t *origpred = pre; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, pre_stride, stride, - xd->plane[0].eobs[0]); - vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, pre_stride, stride, + vp9_dequant_idct_add_8x8_c(q, dq, dst, stride, xd->plane[0].eobs[0]); + vp9_dequant_idct_add_8x8_c(&q[64], dq, origdest + 8, stride, xd->plane[0].eobs[4]); - vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * pre_stride, - origdest + 8 * stride, pre_stride, stride, + vp9_dequant_idct_add_8x8_c(&q[128], dq, origdest + 8 * stride, stride, xd->plane[0].eobs[8]); - vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * pre_stride + 8, - origdest + 8 * stride + 8, pre_stride, stride, + vp9_dequant_idct_add_8x8_c(&q[192], dq, origdest + 8 * stride + 8, stride, xd->plane[0].eobs[12]); } void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, - uint8_t *dst, - int stride, MACROBLOCKD *xd) { + uint8_t *dst, int stride, + MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, pre_stride, stride, + vp9_dequant_idct_add_lossless_c(q, dq, dst, stride, xd->plane[0].eobs[i * 4 + j]); q += 16; - pre += 4; dst += 4; } - pre += 4 * pre_stride - 16; dst += 4 * stride - 16; } } void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, int pre_stride, - uint8_t *dst, - int stride, + uint8_t *dst, int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, pre_stride, stride, - eobs[i * 2 + j]); + vp9_dequant_idct_add_lossless_c(q, dq, dst, stride, eobs[i * 2 + j]); q += 16; - pre += 4; dst += 4; } - pre += 4 * pre_stride - 8; dst += 4 * stride - 8; } } diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h index 4ec6de99d..c9832e11d 100644 --- a/vp9/decoder/vp9_treereader.h +++ b/vp9/decoder/vp9_treereader.h @@ -17,10 +17,8 @@ typedef BOOL_DECODER vp9_reader; -#define vp9_read decode_bool -#define vp9_read_literal decode_value -#define vp9_read_bit(r) vp9_read(r, vp9_prob_half) #define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8)) +#define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value)) // Intent of tree data structure is to make decoding trivial. static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */ diff --git a/vp9/decoder/x86/vp9_dequantize_x86.c b/vp9/decoder/x86/vp9_dequantize_x86.c index acfae2a27..cbe818143 100644 --- a/vp9/decoder/x86/vp9_dequantize_x86.c +++ b/vp9/decoder/x86/vp9_dequantize_x86.c @@ -17,8 +17,7 @@ #if HAVE_SSE2 -void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { +void vp9_add_residual_4x4_sse2(const int16_t *diff, uint8_t *dest, int stride) { const int width = 4; const __m128i zero = _mm_setzero_si128(); @@ -29,10 +28,10 @@ void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred, const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width)); // Prediction data. - __m128i p0 = _mm_cvtsi32_si128(*(const int *)(pred + 0 * pitch)); - __m128i p1 = _mm_cvtsi32_si128(*(const int *)(pred + 1 * pitch)); - __m128i p2 = _mm_cvtsi32_si128(*(const int *)(pred + 2 * pitch)); - __m128i p3 = _mm_cvtsi32_si128(*(const int *)(pred + 3 * pitch)); + __m128i p0 = _mm_cvtsi32_si128(*(const int *)(dest + 0 * stride)); + __m128i p1 = _mm_cvtsi32_si128(*(const int *)(dest + 1 * stride)); + __m128i p2 = _mm_cvtsi32_si128(*(const int *)(dest + 2 * stride)); + __m128i p3 = _mm_cvtsi32_si128(*(const int *)(dest + 3 * stride)); p0 = _mm_unpacklo_epi8(p0, zero); p1 = _mm_unpacklo_epi8(p1, zero); @@ -61,8 +60,7 @@ void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred, *(int *)dest = _mm_cvtsi128_si32(p2); } -void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { +void vp9_add_residual_8x8_sse2(const int16_t *diff, uint8_t *dest, int stride) { const int width = 8; const __m128i zero = _mm_setzero_si128(); @@ -77,14 +75,14 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred, const __m128i d7 = _mm_load_si128((const __m128i *)(diff + 7 * width)); // Prediction data. - __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch)); - __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch)); - __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch)); - __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch)); - __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch)); - __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch)); - __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch)); + __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride)); + __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride)); + __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride)); + __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride)); + __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride)); + __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride)); + __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride)); p0 = _mm_unpacklo_epi8(p0, zero); p1 = _mm_unpacklo_epi8(p1, zero); @@ -126,8 +124,8 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred, _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); } -void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { +void vp9_add_residual_16x16_sse2(const int16_t *diff, uint8_t *dest, + int stride) { const int width = 16; int i = 4; const __m128i zero = _mm_setzero_si128(); @@ -147,10 +145,10 @@ void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred, d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8)); // Prediction data. - p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch)); - p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch)); + p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); + p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); + p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); + p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); p0 = _mm_unpacklo_epi8(p1, zero); p1 = _mm_unpackhi_epi8(p1, zero); @@ -181,13 +179,12 @@ void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred, _mm_store_si128((__m128i *)(dest + 3 * stride), p3); diff += 4 * width; - pred += 4 * pitch; dest += 4 * stride; } while (--i); } -void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { +void vp9_add_residual_32x32_sse2(const int16_t *diff, uint8_t *dest, + int stride) { const int width = 32; int i = 16; const __m128i zero = _mm_setzero_si128(); @@ -207,10 +204,10 @@ void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred, d7 = _mm_load_si128((const __m128i *)(diff + 1 * width + 24)); // Prediction data. - p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - p3 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16)); - p5 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - p7 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16)); + p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); + p3 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16)); + p5 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); + p7 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16)); p0 = _mm_unpacklo_epi8(p1, zero); p1 = _mm_unpackhi_epi8(p1, zero); @@ -241,25 +238,24 @@ void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred, _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3); diff += 2 * width; - pred += 2 * pitch; dest += 2 * stride; } while (--i); } -void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { +void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest, + int stride) { uint8_t abs_diff; __m128i d; // Prediction data. - __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch)); - __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch)); - __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch)); - __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch)); - __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch)); - __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch)); - __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch)); + __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride)); + __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride)); + __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride)); + __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride)); + __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride)); + __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride)); + __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride)); p0 = _mm_unpacklo_epi64(p0, p1); p2 = _mm_unpacklo_epi64(p2, p3); @@ -303,29 +299,28 @@ void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred, _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); } -void vp9_add_constant_residual_16x16_sse2(const int16_t diff, - const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { +void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest, + int stride) { uint8_t abs_diff; __m128i d; // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - __m128i p2 = _mm_load_si128((const __m128i *)(pred + 2 * pitch)); - __m128i p3 = _mm_load_si128((const __m128i *)(pred + 3 * pitch)); - __m128i p4 = _mm_load_si128((const __m128i *)(pred + 4 * pitch)); - __m128i p5 = _mm_load_si128((const __m128i *)(pred + 5 * pitch)); - __m128i p6 = _mm_load_si128((const __m128i *)(pred + 6 * pitch)); - __m128i p7 = _mm_load_si128((const __m128i *)(pred + 7 * pitch)); - __m128i p8 = _mm_load_si128((const __m128i *)(pred + 8 * pitch)); - __m128i p9 = _mm_load_si128((const __m128i *)(pred + 9 * pitch)); - __m128i p10 = _mm_load_si128((const __m128i *)(pred + 10 * pitch)); - __m128i p11 = _mm_load_si128((const __m128i *)(pred + 11 * pitch)); - __m128i p12 = _mm_load_si128((const __m128i *)(pred + 12 * pitch)); - __m128i p13 = _mm_load_si128((const __m128i *)(pred + 13 * pitch)); - __m128i p14 = _mm_load_si128((const __m128i *)(pred + 14 * pitch)); - __m128i p15 = _mm_load_si128((const __m128i *)(pred + 15 * pitch)); + __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); + __m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); + __m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); + __m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride)); + __m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride)); + __m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride)); + __m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride)); + __m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride)); + __m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride)); + __m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride)); + __m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride)); + __m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride)); + __m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride)); + __m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride)); + __m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride)); // Clip diff value to [0, 255] range. Then, do addition or subtraction // according to its sign. @@ -390,9 +385,8 @@ void vp9_add_constant_residual_16x16_sse2(const int16_t diff, _mm_store_si128((__m128i *)(dest + 15 * stride), p15); } -void vp9_add_constant_residual_32x32_sse2(const int16_t diff, - const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { +void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest, + int stride) { uint8_t abs_diff; __m128i d; int i = 8; @@ -407,14 +401,14 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff, do { // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16)); - __m128i p2 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - __m128i p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16)); - __m128i p4 = _mm_load_si128((const __m128i *)(pred + 2 * pitch)); - __m128i p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch + 16)); - __m128i p6 = _mm_load_si128((const __m128i *)(pred + 3 * pitch)); - __m128i p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch + 16)); + __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16)); + __m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); + __m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16)); + __m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); + __m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16)); + __m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); + __m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16)); // Clip diff value to [0, 255] range. Then, do addition or subtraction // according to its sign. @@ -448,7 +442,6 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff, _mm_store_si128((__m128i *)(dest + 3 * stride), p6); _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7); - pred += 4 * pitch; dest += 4 * stride; } while (--i); } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 8da17a6e2..8a644d556 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -78,6 +78,18 @@ int count_mb_seg[4] = { 0, 0, 0, 0 }; #define SEARCH_NEWP static int update_bits[255]; +static INLINE void write_le16(uint8_t *p, int value) { + p[0] = value; + p[1] = value >> 8; +} + +static INLINE void write_le32(uint8_t *p, int value) { + p[0] = value; + p[1] = value >> 8; + p[2] = value >> 16; + p[3] = value >> 24; +} + static void compute_update_table() { int i; for (i = 0; i < 255; i++) @@ -118,7 +130,7 @@ static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) { static void update_mode( vp9_writer *const bc, int n, - vp9_token tok [/* n */], + const struct vp9_token tok[/* n */], vp9_tree tree, vp9_prob Pnew [/* n-1 */], vp9_prob Pcur [/* n-1 */], @@ -191,7 +203,7 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { if (pc->fc.switchable_interp_prob[j][i] < 1) pc->fc.switchable_interp_prob[j][i] = 1; - vp9_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8); + vp9_write_prob(bc, pc->fc.switchable_interp_prob[j][i]); } } } @@ -444,7 +456,7 @@ static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd, savings = prob_update_savings(ct, *oldp, newp, upd); if (savings > 0) { vp9_write(bc, 1, upd); - vp9_write_literal(bc, newp, 8); + vp9_write_prob(bc, newp); *oldp = newp; } else { vp9_write(bc, 0, upd); @@ -458,12 +470,12 @@ static void pack_mb_tokens(vp9_writer* const bc, while (p < stop) { const int t = p->Token; - vp9_token *const a = vp9_coef_encodings + t; + const struct vp9_token *const a = vp9_coef_encodings + t; const vp9_extra_bit_struct *const b = vp9_extra_bits + t; int i = 0; const unsigned char *pp = p->context_tree; int v = a->value; - int n = a->Len; + int n = a->len; if (t == EOSB_TOKEN) { @@ -508,18 +520,6 @@ static void pack_mb_tokens(vp9_writer* const bc, *tp = p; } -static void write_partition_size(unsigned char *cx_data, int size) { - signed char csize; - - csize = size & 0xff; - *cx_data = csize; - csize = (size >> 8) & 0xff; - *(cx_data + 1) = csize; - csize = (size >> 16) & 0xff; - *(cx_data + 2) = csize; - -} - static void write_mv_ref ( vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p @@ -801,9 +801,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } } - if (!pc->mb_no_coeff_skip) { - skip_coeff = 0; - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { skip_coeff = 1; } else { skip_coeff = m->mbmi.mb_skip_coeff; @@ -996,8 +994,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, (rf != INTRA_FRAME && !(mode == SPLITMV && mi->partitioning == PARTITIONING_4X4))) && pc->txfm_mode == TX_MODE_SELECT && - !((pc->mb_no_coeff_skip && skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { + !(skip_coeff || vp9_segfeature_active(xd, segment_id, + SEG_LVL_SKIP))) { TX_SIZE sz = mi->txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); @@ -1024,9 +1022,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, write_mb_segid(bc, &m->mbmi, xd); } - if (!c->mb_no_coeff_skip) { - skip_coeff = 0; - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { skip_coeff = 1; } else { skip_coeff = m->mbmi.mb_skip_coeff; @@ -1074,8 +1070,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && - !((c->mb_no_coeff_skip && skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { + !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { TX_SIZE sz = m->mbmi.txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); @@ -1678,64 +1673,104 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, pack_mb_tokens(bc, tok, tok_end); } +static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, + TOKENEXTRA **tok, TOKENEXTRA *tok_end, + int mb_row, int mb_col, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mode_info_stride; + int bwl, bhl; +#if CONFIG_SBSEGMENT + int bw, bh; +#endif + int bsl = mb_width_log2(bsize), bs = (1 << bsl) / 2; + int n; + PARTITION_TYPE partition; + BLOCK_SIZE_TYPE subsize; + + if (mb_row >= cm->mb_rows || mb_col >= cm->mb_cols) + return; + + bwl = mb_width_log2(m->mbmi.sb_type); + bhl = mb_height_log2(m->mbmi.sb_type); +#if CONFIG_SBSEGMENT + bw = 1 << bwl; + bh = 1 << bhl; +#endif + + // parse the partition type + if ((bwl == bsl) && (bhl == bsl)) + partition = PARTITION_NONE; +#if CONFIG_SBSEGMENT + else if ((bwl == bsl) && (bhl < bsl)) + partition = PARTITION_HORZ; + else if ((bwl < bsl) && (bhl == bsl)) + partition = PARTITION_VERT; +#endif + else if ((bwl < bsl) && (bhl < bsl)) + partition = PARTITION_SPLIT; + else + assert(0); + + if (bsize > BLOCK_SIZE_MB16X16) + // encode the partition information + write_token(bc, vp9_partition_tree, cm->fc.partition_prob[bsl - 1], + vp9_partition_encodings + partition); + + switch (partition) { + case PARTITION_NONE: + write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col); + break; +#if CONFIG_SBSEGMENT + case PARTITION_HORZ: + write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col); + if ((mb_row + bh) < cm->mb_rows) + write_modes_b(cpi, m + bh * mis, bc, tok, tok_end, mb_row + bh, mb_col); + break; + case PARTITION_VERT: + write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col); + if ((mb_col + bw) < cm->mb_cols) + write_modes_b(cpi, m + bw, bc, tok, tok_end, mb_row, mb_col + bw); + break; +#endif + case PARTITION_SPLIT: + // TODO(jingning): support recursive partitioning down to 16x16 as for + // now. need to merge in 16x8, 8x16, 8x8, and smaller partitions. + if (bsize == BLOCK_SIZE_SB64X64) + subsize = BLOCK_SIZE_SB32X32; + else if (bsize == BLOCK_SIZE_SB32X32) + subsize = BLOCK_SIZE_MB16X16; + else + assert(0); + for (n = 0; n < 4; n++) { + int j = n >> 1, i = n & 0x01; + write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end, + mb_row + j * bs, mb_col + i * bs, subsize); + } + break; + default: + assert(0); + } +} + static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, TOKENEXTRA **tok, TOKENEXTRA *tok_end) { VP9_COMMON *const c = &cpi->common; const int mis = c->mode_info_stride; MODE_INFO *m, *m_ptr = c->mi; - int i, mb_row, mb_col; + int mb_row, mb_col; m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis; for (mb_row = c->cur_tile_mb_row_start; mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) { m = m_ptr; for (mb_col = c->cur_tile_mb_col_start; - mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) { - vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->prob_sb64_coded); - if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col); - } else { - int j; - - for (j = 0; j < 4; j++) { - const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; - MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb; - - if (mb_col + x_idx_sb >= c->mb_cols || - mb_row + y_idx_sb >= c->mb_rows) - continue; - - vp9_write(bc, sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32, - c->prob_sb32_coded); - if (sb_m->mbmi.sb_type) { - assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32); - write_modes_b(cpi, sb_m, bc, tok, tok_end, - mb_row + y_idx_sb, mb_col + x_idx_sb); - } else { - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); - MODE_INFO *mb_m = m + x_idx + y_idx * mis; - - if (mb_row + y_idx >= c->mb_rows || - mb_col + x_idx >= c->mb_cols) { - // MB lies outside frame, move on - continue; - } - - assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16); - write_modes_b(cpi, mb_m, bc, tok, tok_end, - mb_row + y_idx, mb_col + x_idx); - } - } - } - } - } + mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) + write_modes_sb(cpi, m, bc, tok, tok_end, mb_row, mb_col, + BLOCK_SIZE_SB64X64); } } - /* This function is used for debugging probability trees. */ static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) { /* print coef probability tree */ @@ -2358,33 +2393,19 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, extra_bytes_packed = 3; cx_data += extra_bytes_packed; } - { - int v; - - if (pc->width != pc->display_width || pc->height != pc->display_height) { - v = pc->display_width; - cx_data[0] = v; - cx_data[1] = v >> 8; - - v = pc->display_height; - cx_data[2] = v; - cx_data[3] = v >> 8; - cx_data += 4; - extra_bytes_packed += 4; - } - - v = pc->width; - cx_data[0] = v; - cx_data[1] = v >> 8; - - v = pc->height; - cx_data[2] = v; - cx_data[3] = v >> 8; - extra_bytes_packed += 4; + if (pc->width != pc->display_width || pc->height != pc->display_height) { + write_le16(cx_data, pc->display_width); + write_le16(cx_data + 2, pc->display_height); cx_data += 4; + extra_bytes_packed += 4; } + write_le16(cx_data, pc->width); + write_le16(cx_data + 2, pc->height); + extra_bytes_packed += 4; + cx_data += 4; + vp9_start_encode(&header_bc, cx_data); // TODO(jkoleszar): remove these two unused bits? @@ -2412,20 +2433,20 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, const int prob = xd->mb_segment_tree_probs[i]; if (prob != 255) { vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, prob, 8); + vp9_write_prob(&header_bc, prob); } else { vp9_write_bit(&header_bc, 0); } } // Write out the chosen coding method. - vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0); + vp9_write_bit(&header_bc, pc->temporal_update); if (pc->temporal_update) { for (i = 0; i < PREDICTION_PROBS; i++) { const int prob = pc->segment_pred_probs[i]; if (prob != 255) { vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, prob, 8); + vp9_write_prob(&header_bc, prob); } else { vp9_write_bit(&header_bc, 0); } @@ -2480,18 +2501,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, for (i = 0; i < PREDICTION_PROBS; i++) { if (cpi->ref_pred_probs_update[i]) { vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, pc->ref_pred_probs[i], 8); + vp9_write_prob(&header_bc, pc->ref_pred_probs[i]); } else { vp9_write_bit(&header_bc, 0); } } } - pc->prob_sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]); - vp9_write_literal(&header_bc, pc->prob_sb64_coded, 8); - pc->prob_sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); - vp9_write_literal(&header_bc, pc->prob_sb32_coded, 8); - vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless); if (cpi->mb.e_mbd.lossless) { pc->txfm_mode = ONLY_4X4; @@ -2529,9 +2545,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT); } if (pc->txfm_mode == TX_MODE_SELECT) { - vp9_write_literal(&header_bc, pc->prob_tx[0], 8); - vp9_write_literal(&header_bc, pc->prob_tx[1], 8); - vp9_write_literal(&header_bc, pc->prob_tx[2], 8); + vp9_write_prob(&header_bc, pc->prob_tx[0]); + vp9_write_prob(&header_bc, pc->prob_tx[1]); + vp9_write_prob(&header_bc, pc->prob_tx[2]); } } @@ -2600,8 +2616,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } } - // signal here is multi token partition is enabled - // vp9_write_literal(&header_bc, pc->multi_token_partition, 2); + // TODO(jkoleszar): remove these unused bits vp9_write_literal(&header_bc, 0, 2); // Frame Q baseline quantizer index @@ -2729,7 +2744,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, for (j = 0; j < 4; j++) { if (new_context[i][j] != pc->fc.vp9_mode_contexts[i][j]) { vp9_write(&header_bc, 1, 252); - vp9_write_literal(&header_bc, new_context[i][j], 8); + vp9_write_prob(&header_bc, new_context[i][j]); // Only update the persistent copy if this is the "real pack" if (!cpi->dummy_packing) { @@ -2759,7 +2774,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) { if (new_mvref_probs[i][j] != xd->mb_mv_ref_probs[i][j]) { vp9_write(&header_bc, 1, VP9_MVREF_UPDATE_PROB); - vp9_write_literal(&header_bc, new_mvref_probs[i][j], 8); + vp9_write_prob(&header_bc, new_mvref_probs[i][j]); // Only update the persistent copy if this is the "real pack" if (!cpi->dummy_packing) { @@ -2811,13 +2826,14 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob); vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob); vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob); + vp9_copy(cpi->common.fc.pre_partition_prob, cpi->common.fc.partition_prob); cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc; #if CONFIG_COMP_INTERINTRA_PRED cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob; #endif vp9_zero(cpi->sub_mv_ref_count); vp9_zero(cpi->mbsplit_count); - vp9_zero(cpi->common.fc.mv_ref_ct) + vp9_zero(cpi->common.fc.mv_ref_ct); update_coef_probs(cpi, &header_bc); #if CONFIG_CODE_NONZEROCOUNT @@ -2828,15 +2844,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, active_section = 2; #endif - // Write out the mb_no_coeff_skip flag - vp9_write_bit(&header_bc, pc->mb_no_coeff_skip); - if (pc->mb_no_coeff_skip) { - int k; + // TODO(jkoleszar): remove this unused bit + vp9_write_bit(&header_bc, 1); - vp9_update_skip_probs(cpi); - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8); - } + vp9_update_skip_probs(cpi); + for (i = 0; i < MBSKIP_CONTEXTS; ++i) { + vp9_write_prob(&header_bc, pc->mbskip_pred_probs[i]); } if (pc->frame_type == KEY_FRAME) { @@ -2863,9 +2876,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } #endif - vp9_write_literal(&header_bc, pc->prob_intra_coded, 8); - vp9_write_literal(&header_bc, pc->prob_last_coded, 8); - vp9_write_literal(&header_bc, pc->prob_gf_coded, 8); + vp9_write_prob(&header_bc, pc->prob_intra_coded); + vp9_write_prob(&header_bc, pc->prob_last_coded); + vp9_write_prob(&header_bc, pc->prob_gf_coded); { const int comp_pred_mode = cpi->common.comp_pred_mode; @@ -2879,13 +2892,21 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, for (i = 0; i < COMP_PRED_CONTEXTS; i++) { pc->prob_comppred[i] = get_binary_prob(cpi->single_pred_count[i], cpi->comp_pred_count[i]); - vp9_write_literal(&header_bc, pc->prob_comppred[i], 8); + vp9_write_prob(&header_bc, pc->prob_comppred[i]); } } } } update_mbintra_mode_probs(cpi, &header_bc); + for (i = 0; i < PARTITION_PLANES; i++) { + vp9_prob Pnew[PARTITION_TYPES - 1]; + unsigned int bct[PARTITION_TYPES - 1][2]; + update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings, + vp9_partition_tree, Pnew, pc->fc.partition_prob[i], bct, + (unsigned int *)cpi->partition_count[i]); + } + vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc); } @@ -2961,11 +2982,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, write_modes(cpi, &residual_bc, &tok[tile_col], tok_end); vp9_stop_encode(&residual_bc); if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { - /* size of this tile */ - data_ptr[total_size + 0] = residual_bc.pos; - data_ptr[total_size + 1] = residual_bc.pos >> 8; - data_ptr[total_size + 2] = residual_bc.pos >> 16; - data_ptr[total_size + 3] = residual_bc.pos >> 24; + // size of this tile + write_le32(data_ptr + total_size, residual_bc.pos); total_size += 4; } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 7c50756a7..f4e3c2eab 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -174,6 +174,7 @@ struct macroblock { PICK_MODE_CONTEXT sb64x32_context[2]; #endif PICK_MODE_CONTEXT sb64_context; + int partition_cost[PARTITION_PLANES][PARTITION_TYPES]; void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 6365ed9a2..ecd3e2dd3 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -37,30 +37,68 @@ static void fdct4_1d(int16_t *input, int16_t *output) { } void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) { - int16_t out[4 * 4]; - int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; - int i, j; - int16_t temp_in[4], temp_out[4]; - - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = input[j * short_pitch + i] << 4; - if (i == 0 && temp_in[0]) - temp_in[0] += 1; - fdct4_1d(temp_in, temp_out); - for (j = 0; j < 4; ++j) - outptr[j * 4 + i] = temp_out[j]; + // The 2D transform is done with two passes which are actually pretty + // similar. In the first one, we transform the columns and transpose + // the results. In the second one, we transform the rows. To achieve that, + // as the first pass results are transposed, we tranpose the columns (that + // is the transposed rows) and transpose the results (so that it goes back + // in normal/row positions). + const int stride = pitch >> 1; + int pass; + // We need an intermediate buffer between passes. + int16_t intermediate[4 * 4]; + int16_t *in = input; + int16_t *out = intermediate; + // Do the two transform/transpose passes + for (pass = 0; pass < 2; ++pass) { + /*canbe16*/ int input[4]; + /*canbe16*/ int step[4]; + /*needs32*/ int temp1, temp2; + int i; + for (i = 0; i < 4; ++i) { + // Load inputs. + if (0 == pass) { + input[0] = in[0 * stride] << 4; + input[1] = in[1 * stride] << 4; + input[2] = in[2 * stride] << 4; + input[3] = in[3 * stride] << 4; + if (i == 0 && input[0]) { + input[0] += 1; + } + } else { + input[0] = in[0 * 4]; + input[1] = in[1 * 4]; + input[2] = in[2 * 4]; + input[3] = in[3 * 4]; + } + // Transform. + step[0] = input[0] + input[3]; + step[1] = input[1] + input[2]; + step[2] = input[1] - input[2]; + step[3] = input[0] - input[3]; + temp1 = (step[0] + step[1]) * cospi_16_64; + temp2 = (step[0] - step[1]) * cospi_16_64; + out[0] = dct_const_round_shift(temp1); + out[2] = dct_const_round_shift(temp2); + temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; + temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; + out[1] = dct_const_round_shift(temp1); + out[3] = dct_const_round_shift(temp2); + // Do next column (which is a transposed row in second/horizontal pass) + in++; + out += 4; + } + // Setup in/out for next pass. + in = intermediate; + out = output; } - // Rows - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j + i * 4]; - fdct4_1d(temp_in, temp_out); - for (j = 0; j < 4; ++j) - output[j + i * 4] = (temp_out[j] + 1) >> 2; + { + int i, j; + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + output[j + i * 4] = (output[j + i * 4] + 1) >> 2; + } } } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b4ba8dc1f..6f0e8c7f4 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -386,7 +386,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, int i; int best_index = 0; int cost, cost2; - int zero_seen = (mv_ref_list[0].as_int) ? FALSE : TRUE; + int zero_seen = (mv_ref_list[0].as_int) ? 0 : 1; MACROBLOCKD *xd = &x->e_mbd; int max_mv = MV_MAX; @@ -401,7 +401,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, if (zero_seen) break; else - zero_seen = TRUE; + zero_seen = 1; } // Check for cases where the reference choice would give rise to an @@ -853,29 +853,68 @@ static void encode_sb(VP9_COMP *cpi, int mb_row, int mb_col, int output_enabled, - TOKENEXTRA **tp, int is_sb) { + TOKENEXTRA **tp, BLOCK_SIZE_TYPE is_sb) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_SB32X32; - cpi->sb32_count[is_sb]++; - if (is_sb) { - set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB32X32); + if (is_sb == BLOCK_SIZE_SB32X32) { + set_offsets(cpi, mb_row, mb_col, bsize); update_state(cpi, &x->sb32_context[xd->sb_index], - BLOCK_SIZE_SB32X32, output_enabled); + bsize, output_enabled); encode_superblock(cpi, tp, - output_enabled, mb_row, mb_col, BLOCK_SIZE_SB32X32); + output_enabled, mb_row, mb_col, bsize); if (output_enabled) { update_stats(cpi, mb_row, mb_col); - } + cpi->partition_count[partition_plane(bsize)][PARTITION_NONE]++; - if (output_enabled) { (*tp)->Token = EOSB_TOKEN; (*tp)++; } +#if CONFIG_SBSEGMENT + } else if (is_sb == BLOCK_SIZE_SB16X32) { + int i; + + if (output_enabled) + cpi->partition_count[partition_plane(bsize)][PARTITION_VERT]++; + for (i = 0; i < 2 && mb_col + i != cm->mb_cols; i++) { + set_offsets(cpi, mb_row, mb_col + i, BLOCK_SIZE_SB16X32); + update_state(cpi, &x->sb16x32_context[xd->sb_index][i], + BLOCK_SIZE_SB16X32, output_enabled); + encode_superblock(cpi, tp, + output_enabled, mb_row, mb_col + i, BLOCK_SIZE_SB16X32); + if (output_enabled) { + update_stats(cpi, mb_row, mb_col + i); + + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + } + } + } else if (is_sb == BLOCK_SIZE_SB32X16) { + int i; + + if (output_enabled) + cpi->partition_count[partition_plane(bsize)][PARTITION_HORZ]++; + for (i = 0; i < 2 && mb_row + i != cm->mb_rows; i++) { + set_offsets(cpi, mb_row + i, mb_col, BLOCK_SIZE_SB32X16); + update_state(cpi, &x->sb32x16_context[xd->sb_index][i], + BLOCK_SIZE_SB32X16, output_enabled); + encode_superblock(cpi, tp, + output_enabled, mb_row + i, mb_col, BLOCK_SIZE_SB32X16); + if (output_enabled) { + update_stats(cpi, mb_row + i, mb_col); + + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + } + } +#endif } else { int i; + if (output_enabled) + cpi->partition_count[partition_plane(bsize)][PARTITION_SPLIT]++; for (i = 0; i < 4; i++) { const int x_idx = i & 1, y_idx = i >> 1; @@ -897,11 +936,9 @@ static void encode_sb(VP9_COMP *cpi, output_enabled, mb_row + y_idx, mb_col + x_idx); if (output_enabled) { update_stats(cpi, mb_row + y_idx, mb_col + x_idx); - } - if (output_enabled) { (*tp)->Token = EOSB_TOKEN; - (*tp)++; + (*tp)++; } } } @@ -920,24 +957,55 @@ static void encode_sb(VP9_COMP *cpi, static void encode_sb64(VP9_COMP *cpi, int mb_row, int mb_col, - TOKENEXTRA **tp, int is_sb[4]) { + TOKENEXTRA **tp, BLOCK_SIZE_TYPE is_sb[4]) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_SB64X64; - cpi->sb64_count[is_sb[0] == 2]++; - if (is_sb[0] == 2) { - set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB64X64); - update_state(cpi, &x->sb64_context, BLOCK_SIZE_SB64X64, 1); + if (is_sb[0] == BLOCK_SIZE_SB64X64) { + set_offsets(cpi, mb_row, mb_col, bsize); + update_state(cpi, &x->sb64_context, bsize, 1); encode_superblock(cpi, tp, - 1, mb_row, mb_col, BLOCK_SIZE_SB64X64); + 1, mb_row, mb_col, bsize); update_stats(cpi, mb_row, mb_col); (*tp)->Token = EOSB_TOKEN; (*tp)++; - } else { + cpi->partition_count[partition_plane(bsize)][PARTITION_NONE]++; +#if CONFIG_SBSEGMENT + } else if (is_sb[0] == BLOCK_SIZE_SB32X64) { int i; + cpi->partition_count[partition_plane(bsize)][PARTITION_VERT]++; + for (i = 0; i < 2 && mb_col + i * 2 != cm->mb_cols; i++) { + set_offsets(cpi, mb_row, mb_col + i * 2, BLOCK_SIZE_SB32X64); + update_state(cpi, &x->sb32x64_context[i], BLOCK_SIZE_SB32X64, 1); + encode_superblock(cpi, tp, + 1, mb_row, mb_col + i * 2, BLOCK_SIZE_SB32X64); + update_stats(cpi, mb_row, mb_col + i * 2); + + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + } + } else if (is_sb[0] == BLOCK_SIZE_SB64X32) { + int i; + + cpi->partition_count[partition_plane(bsize)][PARTITION_HORZ]++; + for (i = 0; i < 2 && mb_row + i * 2 != cm->mb_rows; i++) { + set_offsets(cpi, mb_row + i * 2, mb_col, BLOCK_SIZE_SB64X32); + update_state(cpi, &x->sb64x32_context[i], BLOCK_SIZE_SB64X32, 1); + encode_superblock(cpi, tp, + 1, mb_row + i * 2, mb_col, BLOCK_SIZE_SB64X32); + update_stats(cpi, mb_row + i * 2, mb_col); + + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + } +#endif + } else { + int i; + cpi->partition_count[partition_plane(bsize)][PARTITION_SPLIT]++; for (i = 0; i < 4; i++) { const int x_idx = i & 1, y_idx = i >> 1; @@ -969,9 +1037,8 @@ static void encode_sb_row(VP9_COMP *cpi, for (mb_col = cm->cur_tile_mb_col_start; mb_col < cm->cur_tile_mb_col_end; mb_col += 4) { int i; - int sb32_rate = 0, sb32_dist = 0; - int is_sb[4]; - int sb64_rate = INT_MAX, sb64_dist; + BLOCK_SIZE_TYPE sb_partitioning[4]; + int sb64_rate = 0, sb64_dist = 0; int sb64_skip = 0; ENTROPY_CONTEXT_PLANES l[4], a[4]; TOKENEXTRA *tp_orig = *tp; @@ -980,8 +1047,7 @@ static void encode_sb_row(VP9_COMP *cpi, memcpy(&l, cm->left_context, sizeof(l)); for (i = 0; i < 4; i++) { const int x_idx = (i & 1) << 1, y_idx = i & 2; - int mb_rate = 0, mb_dist = 0; - int sb_rate = INT_MAX, sb_dist; + int sb32_rate = 0, sb32_dist = 0; int splitmodes_used = 0; int sb32_skip = 0; int j; @@ -997,6 +1063,7 @@ static void encode_sb_row(VP9_COMP *cpi, vpx_memcpy(a2, cm->above_context + mb_col + x_idx, sizeof(a2)); /* Encode MBs in raster order within the SB */ + sb_partitioning[i] = BLOCK_SIZE_MB16X16; for (j = 0; j < 4; j++) { const int x_idx_m = x_idx + (j & 1), y_idx_m = y_idx + (j >> 1); int r, d; @@ -1012,8 +1079,8 @@ static void encode_sb_row(VP9_COMP *cpi, splitmodes_used += pick_mb_mode(cpi, mb_row + y_idx_m, mb_col + x_idx_m, tp, &r, &d); - mb_rate += r; - mb_dist += d; + sb32_rate += r; + sb32_dist += d; // Dummy encode, do not do the tokenization encode_macroblock(cpi, tp, 0, mb_row + y_idx_m, @@ -1024,72 +1091,234 @@ static void encode_sb_row(VP9_COMP *cpi, vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2)); vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2)); - mb_rate += vp9_cost_bit(cm->prob_sb32_coded, 0); + sb32_rate += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)] + [PARTITION_SPLIT]; if (cpi->sf.splitmode_breakout) { sb32_skip = splitmodes_used; sb64_skip += splitmodes_used; } +#if CONFIG_SBSEGMENT + // check 32x16 + if (mb_col + x_idx + 1 < cm->mb_cols) { + int r, d; + + xd->mb_index = 0; + pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx, + tp, &r, &d, BLOCK_SIZE_SB32X16, + &x->sb32x16_context[xd->sb_index][xd->mb_index]); + if (mb_row + y_idx + 1 < cm->mb_rows) { + int r2, d2; + + update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index], + BLOCK_SIZE_SB32X16, 0); + encode_superblock(cpi, tp, + 0, mb_row + y_idx, mb_col + x_idx, + BLOCK_SIZE_SB32X16); + xd->mb_index = 1; + pick_sb_modes(cpi, mb_row + y_idx + 1, mb_col + x_idx, + tp, &r2, &d2, BLOCK_SIZE_SB32X16, + &x->sb32x16_context[xd->sb_index][xd->mb_index]); + r += r2; + d += d2; + } + + r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)] + [PARTITION_HORZ]; + + /* is this better than MB coding? */ + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { + sb32_rate = r; + sb32_dist = d; + sb_partitioning[i] = BLOCK_SIZE_SB32X16; + } + + vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2)); + vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2)); + } + + // check 16x32 + if (mb_row + y_idx + 1 < cm->mb_rows) { + int r, d; + + xd->mb_index = 0; + pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx, + tp, &r, &d, BLOCK_SIZE_SB16X32, + &x->sb16x32_context[xd->sb_index][xd->mb_index]); + if (mb_col + x_idx + 1 < cm->mb_cols) { + int r2, d2; + + update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index], + BLOCK_SIZE_SB16X32, 0); + encode_superblock(cpi, tp, + 0, mb_row + y_idx, mb_col + x_idx, + BLOCK_SIZE_SB16X32); + xd->mb_index = 1; + pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx + 1, + tp, &r2, &d2, BLOCK_SIZE_SB16X32, + &x->sb16x32_context[xd->sb_index][xd->mb_index]); + r += r2; + d += d2; + } + + r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)] + [PARTITION_VERT]; + + /* is this better than MB coding? */ + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { + sb32_rate = r; + sb32_dist = d; + sb_partitioning[i] = BLOCK_SIZE_SB16X32; + } + + vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2)); + vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2)); + } +#endif + if (!sb32_skip && !(mb_col + x_idx + 1 >= cm->mb_cols || mb_row + y_idx + 1 >= cm->mb_rows)) { + int r, d; + /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx, - tp, &sb_rate, &sb_dist, BLOCK_SIZE_SB32X32, + tp, &r, &d, BLOCK_SIZE_SB32X32, &x->sb32_context[xd->sb_index]); - sb_rate += vp9_cost_bit(cm->prob_sb32_coded, 1); + r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)] + [PARTITION_NONE]; + + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { + sb32_rate = r; + sb32_dist = d; + sb_partitioning[i] = BLOCK_SIZE_SB32X32; + } } - /* Decide whether to encode as a SB or 4xMBs */ - if (sb_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < - RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { - is_sb[i] = 1; - sb32_rate += sb_rate; - sb32_dist += sb_dist; - } else { - is_sb[i] = 0; - sb32_rate += mb_rate; - sb32_dist += mb_dist; - - // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled). - if (cpi->sf.mb16_breakout) { - ++sb64_skip; - } + // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled). + if (cpi->sf.mb16_breakout && sb_partitioning[i] != BLOCK_SIZE_SB32X32) { + ++sb64_skip; } + sb64_rate += sb32_rate; + sb64_dist += sb32_dist; + /* Encode SB using best computed mode(s) */ // FIXME(rbultje): there really shouldn't be any need to encode_mb/sb // for each level that we go up, we can just keep tokens and recon // pixels of the lower level; also, inverting SB/MB order (big->small // instead of small->big) means we can use as threshold for small, which // may enable breakouts if RD is not good enough (i.e. faster) - encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, 0, tp, is_sb[i]); + encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, 0, tp, + sb_partitioning[i]); } memcpy(cm->above_context + mb_col, &a, sizeof(a)); memcpy(cm->left_context, &l, sizeof(l)); - sb32_rate += vp9_cost_bit(cm->prob_sb64_coded, 0); + + sb64_rate += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)] + [PARTITION_SPLIT]; + +#if CONFIG_SBSEGMENT + // check 64x32 + if (mb_col + 3 < cm->mb_cols && !(cm->mb_rows & 1)) { + int r, d; + + xd->sb_index = 0; + pick_sb_modes(cpi, mb_row, mb_col, + tp, &r, &d, BLOCK_SIZE_SB64X32, + &x->sb64x32_context[xd->sb_index]); + if (mb_row + 2 != cm->mb_rows) { + int r2, d2; + + update_state(cpi, &x->sb64x32_context[xd->sb_index], + BLOCK_SIZE_SB64X32, 0); + encode_superblock(cpi, tp, + 0, mb_row, mb_col, BLOCK_SIZE_SB64X32); + xd->sb_index = 1; + pick_sb_modes(cpi, mb_row + 2, mb_col, + tp, &r2, &d2, BLOCK_SIZE_SB64X32, + &x->sb64x32_context[xd->sb_index]); + r += r2; + d += d2; + } + + r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)] + [PARTITION_HORZ]; + + /* is this better than MB coding? */ + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { + sb64_rate = r; + sb64_dist = d; + sb_partitioning[0] = BLOCK_SIZE_SB64X32; + } + + vpx_memcpy(cm->left_context, l, sizeof(l)); + vpx_memcpy(cm->above_context + mb_col, a, sizeof(a)); + } + + // check 32x64 + if (mb_row + 3 < cm->mb_rows && !(cm->mb_cols & 1)) { + int r, d; + + xd->sb_index = 0; + pick_sb_modes(cpi, mb_row, mb_col, + tp, &r, &d, BLOCK_SIZE_SB32X64, + &x->sb32x64_context[xd->sb_index]); + if (mb_col + 2 != cm->mb_cols) { + int r2, d2; + + update_state(cpi, &x->sb32x64_context[xd->sb_index], + BLOCK_SIZE_SB32X64, 0); + encode_superblock(cpi, tp, + 0, mb_row, mb_col, BLOCK_SIZE_SB32X64); + xd->sb_index = 1; + pick_sb_modes(cpi, mb_row, mb_col + 2, + tp, &r2, &d2, BLOCK_SIZE_SB32X64, + &x->sb32x64_context[xd->sb_index]); + r += r2; + d += d2; + } + + r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)] + [PARTITION_VERT]; + + /* is this better than MB coding? */ + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { + sb64_rate = r; + sb64_dist = d; + sb_partitioning[0] = BLOCK_SIZE_SB32X64; + } + + vpx_memcpy(cm->left_context, l, sizeof(l)); + vpx_memcpy(cm->above_context + mb_col, a, sizeof(a)); + } +#endif if (!sb64_skip && !(mb_col + 3 >= cm->mb_cols || mb_row + 3 >= cm->mb_rows)) { - pick_sb_modes(cpi, mb_row, mb_col, tp, &sb64_rate, &sb64_dist, - BLOCK_SIZE_SB64X64, &x->sb64_context); - sb64_rate += vp9_cost_bit(cm->prob_sb64_coded, 1); - } + int r, d; - /* Decide whether to encode as a SB or 4xMBs */ - if (sb64_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist) < - RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { - is_sb[0] = 2; - *totalrate += sb64_rate; - } else { - *totalrate += sb32_rate; + pick_sb_modes(cpi, mb_row, mb_col, tp, &r, &d, + BLOCK_SIZE_SB64X64, &x->sb64_context); + r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)] + [PARTITION_NONE]; + + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { + sb64_rate = r; + sb64_dist = d; + sb_partitioning[0] = BLOCK_SIZE_SB64X64; + } } assert(tp_orig == *tp); - encode_sb64(cpi, mb_row, mb_col, tp, is_sb); + encode_sb64(cpi, mb_row, mb_col, tp, sb_partitioning); assert(tp_orig < *tp); } } @@ -1139,8 +1368,8 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->mbsplit_count) vp9_zero(cpi->common.fc.mv_ref_ct) vp9_zero(cpi->sb_ymode_count) - vp9_zero(cpi->sb32_count); - vp9_zero(cpi->sb64_count); + vp9_zero(cpi->partition_count); + #if CONFIG_COMP_INTERINTRA_PRED vp9_zero(cpi->interintra_count); vp9_zero(cpi->interintra_select_count); @@ -1158,7 +1387,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4; cpi->mb.optimize = 0; cpi->common.filter_level = 0; - cpi->zbin_mode_boost_enabled = FALSE; + cpi->zbin_mode_boost_enabled = 0; cpi->common.txfm_mode = ONLY_4X4; } else { cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; @@ -1348,7 +1577,6 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->txfm_size > txfm_max) { - VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int segment_id = mbmi->segment_id; @@ -1357,8 +1585,8 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, const int xmbs = MIN(bw, mb_cols_left); xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || - (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); + assert(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) || + get_skip_flag(mi, mis, ymbs, xmbs)); set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); } } @@ -1376,6 +1604,26 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max, cm->mb_rows - mb_row, cm->mb_cols - mb_col, BLOCK_SIZE_SB64X64); +#if CONFIG_SBSEGMENT + } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X32) { + reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max, + cm->mb_rows - mb_row, cm->mb_cols - mb_col, + BLOCK_SIZE_SB64X32); + if (mb_row + 2 != cm->mb_rows) + reset_skip_txfm_size_sb(cpi, mi + 2 * mis, mis, txfm_max, + cm->mb_rows - mb_row - 2, + cm->mb_cols - mb_col, + BLOCK_SIZE_SB64X32); + } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB32X64) { + reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max, + cm->mb_rows - mb_row, cm->mb_cols - mb_col, + BLOCK_SIZE_SB32X64); + if (mb_col + 2 != cm->mb_cols) + reset_skip_txfm_size_sb(cpi, mi + 2, mis, txfm_max, + cm->mb_rows - mb_row, + cm->mb_cols - mb_col - 2, + BLOCK_SIZE_SB32X64); +#endif } else { int i; @@ -1387,11 +1635,33 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { mb_col + x_idx_sb >= cm->mb_cols) continue; - if (sb_mi->mbmi.sb_type) { + if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32) { reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max, cm->mb_rows - mb_row - y_idx_sb, cm->mb_cols - mb_col - x_idx_sb, BLOCK_SIZE_SB32X32); +#if CONFIG_SBSEGMENT + } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X16) { + reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb, + cm->mb_cols - mb_col - x_idx_sb, + BLOCK_SIZE_SB32X16); + if (mb_row + y_idx_sb + 1 != cm->mb_rows) + reset_skip_txfm_size_sb(cpi, sb_mi + mis, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb - 1, + cm->mb_cols - mb_col - x_idx_sb, + BLOCK_SIZE_SB32X16); + } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB16X32) { + reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb, + cm->mb_cols - mb_col - x_idx_sb, + BLOCK_SIZE_SB16X32); + if (mb_col + x_idx_sb + 1 != cm->mb_cols) + reset_skip_txfm_size_sb(cpi, sb_mi + 1, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb, + cm->mb_cols - mb_col - x_idx_sb - 1, + BLOCK_SIZE_SB16X32); +#endif } else { int m; @@ -1961,10 +2231,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, if (!x->skip) { vp9_encode_inter16x16(cm, x, mb_row, mb_col); - // Clear mb_skip_coeff if mb_no_coeff_skip is not set - if (!cpi->common.mb_no_coeff_skip) - mbmi->mb_skip_coeff = 0; - } else { vp9_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, @@ -2046,27 +2312,20 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } else { // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - - if (cm->mb_no_coeff_skip) { - mbmi->mb_skip_coeff = 1; - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - mbmi->mb_skip_coeff = 0; - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } + int mb_skip_context = + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff; + + mbmi->mb_skip_coeff = 1; + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); } if (output_enabled) { int segment_id = mbmi->segment_id; if (cpi->common.txfm_mode == TX_MODE_SELECT && - !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || - (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP)))) { + !(mbmi->mb_skip_coeff || + vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP))) { assert(mbmi->txfm_size <= TX_16X16); if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { @@ -2090,6 +2349,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } } +void __attribute__((noinline)) hi(void) { } + static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mb_row, int mb_col, BLOCK_SIZE_TYPE bsize) { @@ -2279,19 +2540,13 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize); } else { // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; + int mb_skip_context = + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff; xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb_tokens_context(xd, bsize); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled, bsize); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_sb_tokens_context(xd, bsize); } // copy skip flag on all mb_mode_info contexts in this SB @@ -2304,8 +2559,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { + !(mi->mbmi.mb_skip_coeff || + vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { if (bsize >= BLOCK_SIZE_SB32X32) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index 9431f0781..7c0b3ddeb 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -561,10 +561,10 @@ void vp9_encode_nmv(vp9_writer* const bc, const MV* const mv, MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); write_token(bc, vp9_mv_joint_tree, mvctx->joints, vp9_mv_joint_encodings + j); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_vertical(j)) { encode_nmv_component(bc, mv->row, ref->col, &mvctx->comps[0]); } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_horizontal(j)) { encode_nmv_component(bc, mv->col, ref->col, &mvctx->comps[1]); } } @@ -574,10 +574,10 @@ void vp9_encode_nmv_fp(vp9_writer* const bc, const MV* const mv, int usehp) { MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); usehp = usehp && vp9_use_nmv_hp(ref); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_vertical(j)) { encode_nmv_component_fp(bc, mv->row, ref->row, &mvctx->comps[0], usehp); } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_horizontal(j)) { encode_nmv_component_fp(bc, mv->col, ref->col, &mvctx->comps[1], usehp); } } @@ -603,59 +603,33 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, if (mbmi->mode == SPLITMV) { int i; - - for (i = 0; i < x->partition_info->count; i++) { - if (x->partition_info->bmi[i].mode == NEW4X4) { - if (x->e_mbd.allow_high_precision_mv) { - mv.row = (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); - if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) { - mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, - &cpi->NMVcount, 1); - } - } else { - mv.row = (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); - if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) { - mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, - &cpi->NMVcount, 0); - } + PARTITION_INFO *pi = x->partition_info; + for (i = 0; i < pi->count; i++) { + if (pi->bmi[i].mode == NEW4X4) { + mv.row = (pi->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row); + mv.col = (pi->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col); + vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); + if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) { + mv.row = pi->bmi[i].second_mv.as_mv.row - + second_best_ref_mv->as_mv.row; + mv.col = pi->bmi[i].second_mv.as_mv.col - + second_best_ref_mv->as_mv.col; + vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); } } } } else if (mbmi->mode == NEWMV) { - if (x->e_mbd.allow_high_precision_mv) { - mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); - if (mbmi->second_ref_frame > 0) { - mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1); - } - } else { - mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); - if (mbmi->second_ref_frame > 0) { - mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0); - } + mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); + mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); + vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); + if (mbmi->second_ref_frame > 0) { + mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); + mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); + vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); } } } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index d333e8e31..04ef55513 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -1219,7 +1219,7 @@ static int detect_transition_to_still( int still_interval, double loop_decay_rate, double last_decay_rate) { - int trans_to_still = FALSE; + int trans_to_still = 0; // Break clause to detect very still sections after motion // For example a static image after a fade or other transition @@ -1248,7 +1248,7 @@ static int detect_transition_to_still( // Only if it does do we signal a transition to still if (j == still_interval) - trans_to_still = TRUE; + trans_to_still = 1; } return trans_to_still; @@ -1260,7 +1260,7 @@ static int detect_transition_to_still( static int detect_flash(VP9_COMP *cpi, int offset) { FIRSTPASS_STATS next_frame; - int flash_detected = FALSE; + int flash_detected = 0; // Read the frame data. // The return is FALSE (no flash detected) if not a valid frame @@ -1272,7 +1272,7 @@ static int detect_flash(VP9_COMP *cpi, int offset) { // comapred to pcnt_inter. if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) && (next_frame.pcnt_second_ref >= 0.5)) { - flash_detected = TRUE; + flash_detected = 1; } } @@ -1372,7 +1372,7 @@ static int calc_arf_boost( double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; int arf_boost; - int flash_detected = FALSE; + int flash_detected = 0; // Search forward from the proposed arf/next gf position for (i = 0; i < f_frames; i++) { @@ -1541,12 +1541,12 @@ void define_fixed_arf_period(VP9_COMP *cpi) { if (cpi->twopass.frames_to_key <= (FIXED_ARF_GROUP_SIZE + 8)) { // Setup a GF group close to the keyframe. - cpi->source_alt_ref_pending = FALSE; + cpi->source_alt_ref_pending = 0; cpi->baseline_gf_interval = cpi->twopass.frames_to_key; schedule_frames(cpi, 0, (cpi->baseline_gf_interval - 1), 2, 0, 0); } else { // Setup a fixed period ARF group. - cpi->source_alt_ref_pending = TRUE; + cpi->source_alt_ref_pending = 1; cpi->baseline_gf_interval = FIXED_ARF_GROUP_SIZE; schedule_frames(cpi, 0, -(cpi->baseline_gf_interval - 1), 2, 1, 0); } @@ -1691,7 +1691,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // (for example a static image after a fade or other transition). if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, last_loop_decay_rate)) { - allow_alt_ref = FALSE; + allow_alt_ref = 0; break; } } @@ -1769,7 +1769,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { (boost_score > 100)) { // Alternative boost calculation for alt ref cpi->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost); - cpi->source_alt_ref_pending = TRUE; + cpi->source_alt_ref_pending = 1; #if CONFIG_MULTIPLE_ARF // Set the ARF schedule. @@ -1779,7 +1779,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { #endif } else { cpi->gfu_boost = (int)boost_score; - cpi->source_alt_ref_pending = FALSE; + cpi->source_alt_ref_pending = 0; #if CONFIG_MULTIPLE_ARF // Set the GF schedule. if (cpi->multi_arf_enabled) { @@ -2257,7 +2257,7 @@ static int test_candidate_kf(VP9_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame) { - int is_viable_kf = FALSE; + int is_viable_kf = 0; // Does the frame satisfy the primary criteria of a key frame // If so, then examine how well it predicts subsequent frames @@ -2329,12 +2329,12 @@ static int test_candidate_kf(VP9_COMP *cpi, // If there is tolerable prediction for at least the next 3 frames then // break out else discard this potential key frame and move on if (boost_score > 30.0 && (i > 3)) - is_viable_kf = TRUE; + is_viable_kf = 1; else { // Reset the file position reset_fpf_position(cpi, start_pos); - is_viable_kf = FALSE; + is_viable_kf = 0; } } @@ -2370,7 +2370,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->this_key_frame_forced = cpi->next_key_frame_forced; // Clear the alt ref active flag as this can never be active on a key frame - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_active = 0; // Kf is always a gf so clear frames till next gf counter cpi->frames_till_gf_update_due = 0; @@ -2478,9 +2478,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Reset to the start of the group reset_fpf_position(cpi, current_pos); - cpi->next_key_frame_forced = TRUE; + cpi->next_key_frame_forced = 1; } else - cpi->next_key_frame_forced = FALSE; + cpi->next_key_frame_forced = 0; // Special case for the last frame of the file if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index e642b7487..1649ccade 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -79,9 +79,10 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost, MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; - return ((mvjsadcost[vp9_get_mv_joint(v)] + - mvsadcost[0][v.row] + mvsadcost[1][v.col]) * - error_per_bit + 128) >> 8; + + return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(v)] + + mvsadcost[0][v.row] + mvsadcost[1][v.col]) * + error_per_bit, 8); } return 0; } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index ed21044bb..dc02f146e 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1376,8 +1376,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cm->prob_last_coded = 128; cm->prob_gf_coded = 128; cm->prob_intra_coded = 63; - cm->prob_sb32_coded = 200; - cm->prob_sb64_coded = 200; for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) @@ -1456,11 +1454,11 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->key_frame_frequency = cpi->oxcf.key_freq; - cpi->this_key_frame_forced = FALSE; - cpi->next_key_frame_forced = FALSE; + cpi->this_key_frame_forced = 0; + cpi->next_key_frame_forced = 0; - cpi->source_alt_ref_pending = FALSE; - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_pending = 0; + cpi->source_alt_ref_active = 0; cpi->refresh_alt_ref_frame = 0; #if CONFIG_MULTIPLE_ARF @@ -1587,6 +1585,27 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; +#if CONFIG_SBSEGMENT + BFP(BLOCK_32X16, vp9_sad32x16, vp9_variance32x16, vp9_sub_pixel_variance32x16, + NULL, NULL, + NULL, NULL, NULL, + vp9_sad32x16x4d) + + BFP(BLOCK_16X32, vp9_sad16x32, vp9_variance16x32, vp9_sub_pixel_variance16x32, + NULL, NULL, + NULL, NULL, NULL, + vp9_sad16x32x4d) + + BFP(BLOCK_64X32, vp9_sad64x32, vp9_variance64x32, vp9_sub_pixel_variance64x32, + NULL, NULL, + NULL, NULL, NULL, + vp9_sad64x32x4d) + + BFP(BLOCK_32X64, vp9_sad32x64, vp9_variance32x64, vp9_sub_pixel_variance32x64, + NULL, NULL, + NULL, NULL, NULL, + vp9_sad32x64x4d) +#endif BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32, vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v, @@ -2224,10 +2243,10 @@ static void update_alt_ref_frame_stats(VP9_COMP *cpi) { if (!cpi->multi_arf_enabled) #endif // Clear the alternate reference update pending flag. - cpi->source_alt_ref_pending = FALSE; + cpi->source_alt_ref_pending = 0; // Set the alternate reference frame active flag - cpi->source_alt_ref_active = TRUE; + cpi->source_alt_ref_active = 1; } @@ -2264,12 +2283,12 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { // If we are going to use the ALT reference for the next group of frames set a flag to say so. if (cpi->oxcf.fixed_q >= 0 && cpi->oxcf.play_alternate && !cpi->refresh_alt_ref_frame) { - cpi->source_alt_ref_pending = TRUE; + cpi->source_alt_ref_pending = 1; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } if (!cpi->source_alt_ref_pending) - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_active = 0; // Decrement count down till next gf if (cpi->frames_till_gf_update_due > 0) @@ -2388,7 +2407,7 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q, int maxq, int minq) { - int force_recode = FALSE; + int force_recode = 0; VP9_COMMON *cm = &cpi->common; // Is frame recode allowed at all @@ -2402,19 +2421,19 @@ static int recode_loop_test(VP9_COMP *cpi, // General over and under shoot tests if (((cpi->projected_frame_size > high_limit) && (q < maxq)) || ((cpi->projected_frame_size < low_limit) && (q > minq))) { - force_recode = TRUE; + force_recode = 1; } // Special Constrained quality tests else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { // Undershoot and below auto cq level if (q > cpi->cq_target_quality && cpi->projected_frame_size < ((cpi->this_frame_target * 7) >> 3)) { - force_recode = TRUE; + force_recode = 1; } else if (q > cpi->oxcf.cq_level && cpi->projected_frame_size < cpi->min_frame_bandwidth && cpi->active_best_quality > cpi->oxcf.cq_level) { // Severe undershoot and between auto and user cq level - force_recode = TRUE; + force_recode = 1; cpi->active_best_quality = cpi->oxcf.cq_level; } } @@ -2602,7 +2621,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, int frame_over_shoot_limit; int frame_under_shoot_limit; - int loop = FALSE; + int loop = 0; int loop_count; int q_low; @@ -2610,10 +2629,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, int top_index; int bottom_index; - int active_worst_qchanged = FALSE; + int active_worst_qchanged = 0; - int overshoot_seen = FALSE; - int undershoot_seen = FALSE; + int overshoot_seen = 0; + int undershoot_seen = 0; SPEED_FEATURES *sf = &cpi->sf; #if RESET_FOREACH_FILTER @@ -2673,9 +2692,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->zbin_mode_boost = 0; // if (cpi->oxcf.lossless) - cpi->zbin_mode_boost_enabled = FALSE; + cpi->zbin_mode_boost_enabled = 0; // else - // cpi->zbin_mode_boost_enabled = TRUE; + // cpi->zbin_mode_boost_enabled = 1; // Current default encoder behaviour for the altref sign bias cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->source_alt_ref_active; @@ -2707,7 +2726,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } // The alternate reference frame cannot be active for a key frame - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_active = 0; // Reset the RD threshold multipliers to default of * 1 (128) for (i = 0; i < MAX_MODES; i++) @@ -2964,43 +2983,41 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_set_quantizer(cpi, q); if (loop_count == 0) { + int k; // setup skip prob for costing in mode/mv decision - if (cpi->common.mb_no_coeff_skip) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; k++) - cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[q][k]; - - if (cm->frame_type != KEY_FRAME) { - if (cpi->refresh_alt_ref_frame) { - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[2][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k]; - } - } else if (cpi->refresh_golden_frame) { - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[1][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k]; - } - } else { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[0][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k]; - } + for (k = 0; k < MBSKIP_CONTEXTS; k++) + cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[q][k]; + + if (cm->frame_type != KEY_FRAME) { + if (cpi->refresh_alt_ref_frame) { + for (k = 0; k < MBSKIP_CONTEXTS; k++) { + if (cpi->last_skip_false_probs[2][k] != 0) + cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k]; + } + } else if (cpi->refresh_golden_frame) { + for (k = 0; k < MBSKIP_CONTEXTS; k++) { + if (cpi->last_skip_false_probs[1][k] != 0) + cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k]; + } + } else { + int k; + for (k = 0; k < MBSKIP_CONTEXTS; k++) { + if (cpi->last_skip_false_probs[0][k] != 0) + cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k]; } + } - // as this is for cost estimate, let's make sure it does not - // get extreme either way - { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - cm->mbskip_pred_probs[k] = clamp(cm->mbskip_pred_probs[k], - 5, 250); + // as this is for cost estimate, let's make sure it does not + // get extreme either way + { + int k; + for (k = 0; k < MBSKIP_CONTEXTS; ++k) { + cm->mbskip_pred_probs[k] = clamp(cm->mbskip_pred_probs[k], + 5, 250); - if (cpi->is_src_frame_alt_ref) - cm->mbskip_pred_probs[k] = 1; - } + if (cpi->is_src_frame_alt_ref) + cm->mbskip_pred_probs[k] = 1; } } } @@ -3049,7 +3066,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; - active_worst_qchanged = FALSE; + active_worst_qchanged = 0; // Special case handling for forced key frames if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { @@ -3128,7 +3145,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } - overshoot_seen = TRUE; + overshoot_seen = 1; } else { // Frame is too small q_high = q > q_low ? q - 1 : q_low; @@ -3161,7 +3178,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } - undershoot_seen = TRUE; + undershoot_seen = 1; } // Clamp Q to upper and lower limits: @@ -3169,11 +3186,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, loop = q != last_q; } else { - loop = FALSE; + loop = 0; } if (cpi->is_src_frame_alt_ref) - loop = FALSE; + loop = 0; if (!loop && cm->frame_type != KEY_FRAME && sf->search_best_filter) { if (mcomp_filter_index < mcomp_filters) { @@ -3186,7 +3203,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (mcomp_filter_index < mcomp_filters) { cm->mcomp_filter_type = mcomp_filters_to_search[mcomp_filter_index]; loop_count = -1; - loop = TRUE; + loop = 1; } else { int f; int64_t best_cost = mcomp_filter_cost[0]; @@ -3199,7 +3216,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } if (mcomp_best_filter != mcomp_filters_to_search[mcomp_filters - 1]) { loop_count = -1; - loop = TRUE; + loop = 1; cm->mcomp_filter_type = mcomp_best_filter; } /* @@ -3210,8 +3227,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } #if RESET_FOREACH_FILTER if (loop) { - overshoot_seen = FALSE; - undershoot_seen = FALSE; + overshoot_seen = 0; + undershoot_seen = 0; q_low = q_low0; q_high = q_high0; q = Q0; @@ -3328,6 +3345,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count); vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count); vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count); + vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count); #if CONFIG_COMP_INTERINTRA_PRED vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count); #endif @@ -3791,7 +3809,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif - cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag. + cpi->source_alt_ref_pending = 0; // Clear Pending altf Ref flag. } } @@ -3804,11 +3822,11 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF // Is this frame the ARF overlay. - cpi->is_src_frame_alt_ref = FALSE; + cpi->is_src_frame_alt_ref = 0; for (i = 0; i < cpi->arf_buffered; ++i) { if (cpi->source == cpi->alt_ref_source[i]) { - cpi->is_src_frame_alt_ref = TRUE; - cpi->refresh_golden_frame = TRUE; + cpi->is_src_frame_alt_ref = 1; + cpi->refresh_golden_frame = 1; break; } } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index f3209a3f7..197ede20d 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -103,6 +103,7 @@ typedef struct { vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; + vp9_prob partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1]; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; @@ -277,26 +278,6 @@ typedef struct { int static_segmentation; } SPEED_FEATURES; -typedef struct { - MACROBLOCK mb; - int totalrate; -} MB_ROW_COMP; - -typedef struct { - TOKENEXTRA *start; - TOKENEXTRA *stop; -} TOKENLIST; - -typedef struct { - int ithread; - void *ptr1; - void *ptr2; -} ENCODETHREAD_DATA; -typedef struct { - int ithread; - void *ptr1; -} LPFTHREAD_DATA; - enum BlockSize { BLOCK_16X8 = PARTITIONING_16X8, BLOCK_8X16 = PARTITIONING_8X16, @@ -305,6 +286,12 @@ enum BlockSize { BLOCK_16X16, BLOCK_MAX_SEGMENTS, BLOCK_32X32 = BLOCK_MAX_SEGMENTS, +#if CONFIG_SBSEGMENT + BLOCK_32X16, + BLOCK_16X32, + BLOCK_64X32, + BLOCK_32X64, +#endif BLOCK_64X64, BLOCK_MAX_SB_SEGMENTS, }; @@ -469,8 +456,6 @@ typedef struct VP9_COMP { int cq_target_quality; - int sb32_count[2]; - int sb64_count[2]; int sb_ymode_count [VP9_I32X32_MODES]; int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ int bmode_count[VP9_NKF_BINTRAMODES]; @@ -478,6 +463,7 @@ typedef struct VP9_COMP { int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS]; int mbsplit_count[VP9_NUMMBSPLITS]; int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES]; + unsigned int partition_count[PARTITION_PLANES][PARTITION_TYPES]; #if CONFIG_COMP_INTERINTRA_PRED unsigned int interintra_count[2]; unsigned int interintra_select_count[2]; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 83fa1dafd..1401bd64e 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -477,7 +477,7 @@ void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize); - const int bhl = mb_width_log2(bsize); + const int bhl = mb_height_log2(bsize); const int uoff = 16 << (bhl + bwl); int i; @@ -487,7 +487,7 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize); - const int bhl = mb_width_log2(bsize); + const int bhl = mb_height_log2(bsize); const int uoff = 16 << (bhl + bwl); int i; @@ -497,7 +497,7 @@ void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize); - const int bhl = mb_width_log2(bsize); + const int bhl = mb_height_log2(bsize); const int uoff = 16 << (bhl + bwl); int i; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index fc22146c3..96d857fe7 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -141,6 +141,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob); vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob); vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob); + vp9_copy(cc->partition_prob, cm->fc.partition_prob); // Stats #ifdef MODE_STATS @@ -202,6 +203,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob); vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob); + vp9_copy(cm->fc.partition_prob, cc->partition_prob); // Stats #ifdef MODE_STATS diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 1d0715499..2f29b1dc3 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -322,6 +322,11 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { fill_nzc_costs(cpi, TX_32X32); #endif + for (i = 0; i < 2; i++) + vp9_cost_tokens(cpi->mb.partition_cost[i], + cpi->common.fc.partition_prob[i], + vp9_partition_tree); + /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; vp9_init_mode_costs(cpi); @@ -347,50 +352,6 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { return error; } -int vp9_uvsse(MACROBLOCK *x) { - uint8_t *uptr, *vptr; - uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); - uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); - int uv_stride = x->block[16].src_stride; - - unsigned int sse1 = 0; - unsigned int sse2 = 0; - int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row; - int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col; - int offset; - int pre_stride = x->e_mbd.block[16].pre_stride; - - if (mv_row < 0) - mv_row -= 1; - else - mv_row += 1; - - if (mv_col < 0) - mv_col -= 1; - else - mv_col += 1; - - mv_row /= 2; - mv_col /= 2; - - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->e_mbd.pre.u_buffer + offset; - vptr = x->e_mbd.pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) { - vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); - vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); - sse2 += sse1; - } else { - vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); - vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); - sse2 += sse1; - } - return sse2; -} - static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int ib, PLANE_TYPE type, ENTROPY_CONTEXT *a, @@ -610,10 +571,10 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - vp9_prob skip_prob = cm->mb_no_coeff_skip ? - vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128; + vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP); int64_t rd[TX_SIZE_MAX_SB][2]; int n, m; + int s0, s1; for (n = TX_4X4; n <= max_txfm_size; n++) { r[n][1] = r[n][0]; @@ -625,25 +586,16 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cm->mb_no_coeff_skip) { - int s0, s1; + assert(skip_prob > 0); + s0 = vp9_cost_bit(skip_prob, 0); + s1 = vp9_cost_bit(skip_prob, 1); - assert(skip_prob > 0); - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - - for (n = TX_4X4; n <= max_txfm_size; n++) { - if (s[n]) { - rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); - } else { - rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); - rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); - } - } - } else { - for (n = TX_4X4; n <= max_txfm_size; n++) { - rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]); - rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]); + for (n = TX_4X4; n <= max_txfm_size; n++) { + if (s[n]) { + rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); + } else { + rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); + rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } } @@ -2537,7 +2489,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int_mv this_mv; int i; - int zero_seen = FALSE; + int zero_seen = 0; int best_index = 0; int best_sad = INT_MAX; int this_sad = INT_MAX; @@ -2753,7 +2705,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int idx, MV_REFERENCE_FRAME frame_type, - int block_size, + enum BlockSize block_size, int mb_row, int mb_col, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], @@ -2854,6 +2806,44 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep, vp9_clear_system_state(); } +static enum BlockSize y_to_uv_block_size(enum BlockSize bs) { + switch (bs) { + case BLOCK_64X64: return BLOCK_32X32; +#if CONFIG_SBSEGMENT + case BLOCK_64X32: return BLOCK_32X16; + case BLOCK_32X64: return BLOCK_16X32; +#endif + case BLOCK_32X32: return BLOCK_16X16; +#if CONFIG_SBSEGMENT + case BLOCK_32X16: return BLOCK_16X8; + case BLOCK_16X32: return BLOCK_8X16; +#endif + case BLOCK_16X16: return BLOCK_8X8; + default: + assert(0); + return -1; + } +} + +static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) { + switch (bs) { + case BLOCK_SIZE_SB64X64: return BLOCK_64X64; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB64X32: return BLOCK_64X32; + case BLOCK_SIZE_SB32X64: return BLOCK_32X64; +#endif + case BLOCK_SIZE_SB32X32: return BLOCK_32X32; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB32X16: return BLOCK_32X16; + case BLOCK_SIZE_SB16X32: return BLOCK_16X32; +#endif + case BLOCK_SIZE_MB16X16: return BLOCK_16X16; + default: + assert(0); + return -1; + } +} + static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int *saddone, int near_sadidx[], @@ -2872,9 +2862,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, [MAX_REF_FRAMES], YV12_BUFFER_CONFIG *scaled_ref_frame, int mb_row, int mb_col) { - const enum BlockSize block_size = - (bsize == BLOCK_SIZE_MB16X16) ? BLOCK_16X16 : - (bsize == BLOCK_SIZE_SB32X32) ? BLOCK_32X32 : BLOCK_64X64; + const int bw = 1 << mb_width_log2(bsize), bh = 1 << mb_height_log2(bsize); + const enum BlockSize block_size = y_bsizet_to_block_size(bsize); + const enum BlockSize uv_block_size = y_to_uv_block_size(block_size); VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -3044,7 +3034,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (mbmi->mv[1].as_mv.col & 15) == 0; // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - if (bsize == BLOCK_SIZE_SB64X64) { + if (bsize != BLOCK_SIZE_MB16X16) { int switchable_filter_index, newbest; int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; @@ -3070,20 +3060,26 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int tmp_rate_y, tmp_rate_u, tmp_rate_v; int tmp_dist_y, tmp_dist_u, tmp_dist_v; vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize); - var = vp9_variance64x64(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); + var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride, + xd->dst.y_buffer, xd->dst.y_stride, + &sse); // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, 64 * 64, xd->block[0].dequant[1] >> 3, + model_rd_from_var_lapndz(var, 16 * bw * 16 * bh, + xd->block[0].dequant[1] >> 3, &tmp_rate_y, &tmp_dist_y); - var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 32 * 32, xd->block[16].dequant[1] >> 3, + var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride, + xd->dst.u_buffer, xd->dst.uv_stride, + &sse); + model_rd_from_var_lapndz(var, 8 * bw * 8 * bh, + xd->block[16].dequant[1] >> 3, &tmp_rate_u, &tmp_dist_u); - var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 32 * 32, xd->block[20].dequant[1] >> 3, + var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride, + xd->dst.v_buffer, xd->dst.uv_stride, + &sse); + model_rd_from_var_lapndz(var, 8 * bw * 8 * bh, + xd->block[20].dequant[1] >> 3, &tmp_rate_v, &tmp_dist_v); rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, @@ -3107,97 +3103,18 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (cm->mcomp_filter_type != SWITCHABLE && cm->mcomp_filter_type == mbmi->interp_filter)) { int i; - for (i = 0; i < 64; ++i) - vpx_memcpy(tmp_ybuf + i * 64, + for (i = 0; i < 16 * bh; ++i) + vpx_memcpy(tmp_ybuf + i * 16 * bw, xd->dst.y_buffer + i * xd->dst.y_stride, - sizeof(unsigned char) * 64); - for (i = 0; i < 32; ++i) - vpx_memcpy(tmp_ubuf + i * 32, + sizeof(unsigned char) * 16 * bw); + for (i = 0; i < 8 * bh; ++i) + vpx_memcpy(tmp_ubuf + i * 8 * bw, xd->dst.u_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 32); - for (i = 0; i < 32; ++i) - vpx_memcpy(tmp_vbuf + i * 32, + sizeof(unsigned char) * 8 * bw); + for (i = 0; i < 8 * bh; ++i) + vpx_memcpy(tmp_vbuf + i * 8 * bw, xd->dst.v_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 32); - pred_exists = 1; - } - interpolating_intpel_seen |= - intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter]; - } - } else if (bsize == BLOCK_SIZE_SB32X32) { - int switchable_filter_index, newbest; - int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; - int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int rs = 0; - mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); - const int m = vp9_switchable_interp_map[mbmi->interp_filter]; - rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; - } - if (interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i, - tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i); - } else { - unsigned int sse, var; - int tmp_rate_y, tmp_rate_u, tmp_rate_v; - int tmp_dist_y, tmp_dist_u, tmp_dist_v; - vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize); - var = vp9_variance32x32(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, 32 * 32, xd->block[0].dequant[1] >> 3, - &tmp_rate_y, &tmp_dist_y); - var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 16 * 16, xd->block[16].dequant[1] >> 3, - &tmp_rate_u, &tmp_dist_u); - var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 16 * 16, xd->block[20].dequant[1] >> 3, - &tmp_rate_v, &tmp_dist_v); - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, - tmp_dist_y + tmp_dist_u + tmp_dist_v); - if (!interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - tmp_rate_y_i = tmp_rate_y; - tmp_rate_u_i = tmp_rate_u; - tmp_rate_v_i = tmp_rate_v; - tmp_dist_y_i = tmp_dist_y; - tmp_dist_u_i = tmp_dist_u; - tmp_dist_v_i = tmp_dist_v; - } - } - newbest = (switchable_filter_index == 0 || rd < best_rd); - if (newbest) { - best_rd = rd; - *best_filter = mbmi->interp_filter; - } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { - int i; - for (i = 0; i < 32; ++i) - vpx_memcpy(tmp_ybuf + i * 64, - xd->dst.y_buffer + i * xd->dst.y_stride, - sizeof(unsigned char) * 32); - for (i = 0; i < 16; ++i) - vpx_memcpy(tmp_ubuf + i * 32, - xd->dst.u_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 16); - for (i = 0; i < 16; ++i) - vpx_memcpy(tmp_vbuf + i * 32, - xd->dst.v_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 16); + sizeof(unsigned char) * 8 * bw); pred_exists = 1; } interpolating_intpel_seen |= @@ -3207,7 +3124,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int switchable_filter_index, newbest; int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - assert(bsize == BLOCK_SIZE_MB16X16); for (switchable_filter_index = 0; switchable_filter_index < VP9_SWITCHABLE_FILTERS; ++switchable_filter_index) { @@ -3286,26 +3202,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); if (pred_exists) { - if (bsize == BLOCK_SIZE_SB64X64) { - for (i = 0; i < 64; ++i) - vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64, - sizeof(unsigned char) * 64); - for (i = 0; i < 32; ++i) - vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32, - sizeof(unsigned char) * 32); - for (i = 0; i < 32; ++i) - vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32, - sizeof(unsigned char) * 32); - } else if (bsize == BLOCK_SIZE_SB32X32) { - for (i = 0; i < 32; ++i) - vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64, - sizeof(unsigned char) * 32); - for (i = 0; i < 16; ++i) - vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32, - sizeof(unsigned char) * 16); - for (i = 0; i < 16; ++i) - vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32, - sizeof(unsigned char) * 16); + // FIXME(rbultje): mb code still predicts into xd->predictor + if (bsize != BLOCK_SIZE_MB16X16) { + for (i = 0; i < bh * 16; ++i) + vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, + tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16); + for (i = 0; i < bh * 8; ++i) + vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, + tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8); + for (i = 0; i < bh * 8; ++i) + vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, + tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8); } else { vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256); vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64); @@ -3340,14 +3247,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; - if (bsize == BLOCK_SIZE_SB64X64) { - var = vp9_variance64x64(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); - } else if (bsize == BLOCK_SIZE_SB32X32) { - var = vp9_variance32x32(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); + if (bsize != BLOCK_SIZE_MB16X16) { + var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride, + xd->dst.y_buffer, xd->dst.y_stride, + &sse); } else { - assert(bsize == BLOCK_SIZE_MB16X16); var = vp9_variance16x16(*(b->base_src), b->src_stride, xd->predictor, 16, &sse); } @@ -3361,23 +3265,23 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Check u and v to make sure skip is ok int sse2; - if (bsize == BLOCK_SIZE_SB64X64) { + if (bsize != BLOCK_SIZE_MB16X16) { unsigned int sse2u, sse2v; - var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); - var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); + // FIXME(rbultje): mb predictors predict into xd->predictor + var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride, + xd->dst.u_buffer, + xd->dst.uv_stride, &sse2u); + var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride, + xd->dst.v_buffer, + xd->dst.uv_stride, &sse2v); sse2 = sse2u + sse2v; - } else if (bsize == BLOCK_SIZE_SB32X32) { + } else { unsigned int sse2u, sse2v; - var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); - var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); + var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride, + xd->predictor + 256, 8, &sse2u); + var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride, + xd->predictor + 320, 8, &sse2v); sse2 = sse2u + sse2v; - } else { - assert(bsize == BLOCK_SIZE_MB16X16); - sse2 = vp9_uvsse(x); } if (sse2 * 2 < threshold) { @@ -3954,44 +3858,41 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // because there are no non zero coefficients and make any // necessary adjustment for rate. Ignore if skip is coded at // segment level as the cost wont have been added in. - if (cpi->common.mb_no_coeff_skip) { - int mb_skip_allowed; + int mb_skip_allowed; - // Is Mb level skip allowed (i.e. not coded at segment level). - mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + // Is Mb level skip allowed (i.e. not coded at segment level). + mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - if (skippable) { - mbmi->mb_skip_coeff = 1; + if (skippable) { + mbmi->mb_skip_coeff = 1; - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; + // Back out the coefficient coding costs + rate2 -= (rate_y + rate_uv); + // for best_yrd calculation + rate_uv = 0; - if (mb_skip_allowed) { - int prob_skip_cost; + if (mb_skip_allowed) { + int prob_skip_cost; - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP); + // Cost the skip mb case + vp9_prob skip_prob = + vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP); - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } - // Add in the cost of the no skip flag. - else { - mbmi->mb_skip_coeff = 0; - if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit( - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0); + if (skip_prob) { + prob_skip_cost = vp9_cost_bit(skip_prob, 1); rate2 += prob_skip_cost; other_cost += prob_skip_cost; } } + } else { + // Add in the cost of the no skip flag. + mbmi->mb_skip_coeff = 0; + if (mb_skip_allowed) { + int prob_skip_cost = vp9_cost_bit( + vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0); + rate2 += prob_skip_cost; + other_cost += prob_skip_cost; + } } // Calculate the final RD estimate for this mode. @@ -4181,8 +4082,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame = ALTREF_FRAME; mbmi->mv[0].as_int = 0; mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = - (cpi->common.mb_no_coeff_skip) ? 1 : 0; + mbmi->mb_skip_coeff = 1; mbmi->partitioning = 0; set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, scale_factor); @@ -4261,22 +4161,22 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES], err; int i; + ctx->skip = 0; xd->mode_info_context->mbmi.mode = DC_PRED; err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, txfm_cache); rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize); - if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { + if (y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); *returndist = dist_y + (dist_uv >> 2); memset(ctx->txfm_rd_diff, 0, sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff)); } else { - *returnrate = rate_y + rate_uv; - if (cpi->common.mb_no_coeff_skip) - *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returnrate = rate_y + rate_uv + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { ctx->txfm_rd_diff[i] = err - txfm_cache[i]; @@ -4310,6 +4210,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE txfm_size_16x16, txfm_size_8x8; int i; + x->mb_context[xd->sb_index][xd->mb_index].skip = 0; mbmi->ref_frame = INTRA_FRAME; mbmi->mode = DC_PRED; for (i = 0; i <= TX_8X8; i++) { @@ -4327,7 +4228,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE_MB16X16, txfm_cache[1]); mode16x16 = mbmi->mode; txfm_size_16x16 = mbmi->txfm_size; - if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable && + if (y_intra16x16_skippable && ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) || (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) { error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0); @@ -4359,7 +4260,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, } mbmi->mb_skip_coeff = 0; - if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable && + if (y_intra16x16_skippable && ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) || (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) { mbmi->mb_skip_coeff = 1; @@ -4385,8 +4286,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, dist = dist16x16 + (distuv[mbmi->txfm_size != TX_4X4] >> 2); mbmi->uv_mode = modeuv[mbmi->txfm_size != TX_4X4]; } - if (cpi->common.mb_no_coeff_skip) - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); } else { if (error4x4 < error8x8) { rate = rateuv[TX_4X4] + rate4x4; @@ -4401,8 +4301,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, rate = rate8x8 + rateuv[TX_4X4]; dist = dist8x8 + (distuv[TX_4X4] >> 2); } - if (cpi->common.mb_no_coeff_skip) - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); } for (i = 0; i < NB_TXFM_MODES; i++) { @@ -4420,8 +4319,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returndistortion, BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) { - const int block_size = (bsize == BLOCK_SIZE_SB64X64) ? - BLOCK_64X64 : BLOCK_32X32; + const enum BlockSize block_size = y_bsizet_to_block_size(bsize); VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -4686,39 +4584,36 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // because there are no non zero coefficients and make any // necessary adjustment for rate. Ignore if skip is coded at // segment level as the cost wont have been added in. - if (cpi->common.mb_no_coeff_skip) { - int mb_skip_allowed; + int mb_skip_allowed; - // Is Mb level skip allowed (i.e. not coded at segment level). - mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + // Is Mb level skip allowed (i.e. not coded at segment level). + mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - if (skippable) { - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; + if (skippable) { + // Back out the coefficient coding costs + rate2 -= (rate_y + rate_uv); + // for best_yrd calculation + rate_uv = 0; - if (mb_skip_allowed) { - int prob_skip_cost; + if (mb_skip_allowed) { + int prob_skip_cost; - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, xd, PRED_MBSKIP); + // Cost the skip mb case + vp9_prob skip_prob = + vp9_get_pred_prob(cm, xd, PRED_MBSKIP); - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } + if (skip_prob) { + prob_skip_cost = vp9_cost_bit(skip_prob, 1); + rate2 += prob_skip_cost; + other_cost += prob_skip_cost; } } + } else if (mb_skip_allowed) { // Add in the cost of the no skip flag. - else if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, - PRED_MBSKIP), 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } + int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, + PRED_MBSKIP), 0); + rate2 += prob_skip_cost; + other_cost += prob_skip_cost; } // Calculate the final RD estimate for this mode. @@ -4886,7 +4781,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->second_ref_frame = INTRA_FRAME; mbmi->mv[0].as_int = 0; mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; + mbmi->mb_skip_coeff = 1; mbmi->partitioning = 0; mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ? TX_32X32 : cm->txfm_mode; diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index af5526dce..dbadaea74 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -23,6 +23,54 @@ unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64); } +#if CONFIG_SBSEGMENT +unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 32); +} + +void vp9_sad64x32x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 64); +} + +void vp9_sad32x64x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} +#endif + unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -31,6 +79,54 @@ unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32); } +#if CONFIG_SBSEGMENT +unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 16); +} + +void vp9_sad32x16x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 32); +} + +void vp9_sad16x32x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} +#endif + unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index a04a20c29..aac42f738 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -212,15 +212,15 @@ static void count_segs(VP9_COMP *cpi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int (*t_unpred_seg_counts)[MAX_MB_SEGMENTS], - int mb_size, int mb_row, int mb_col) { + int bw, int bh, int mb_row, int mb_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; const int segmap_index = mb_row * cm->mb_cols + mb_col; const int segment_id = mi->mbmi.segment_id; xd->mode_info_context = mi; - set_mb_row(cm, xd, mb_row, mb_size); - set_mb_col(cm, xd, mb_col, mb_size); + set_mb_row(cm, xd, mb_row, bh); + set_mb_col(cm, xd, mb_col, bw); // Count the number of hits on each segment with no prediction no_pred_segcounts[segment_id]++; @@ -290,7 +290,22 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) { if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, - t_unpred_seg_counts, 4, mb_row, mb_col); + t_unpred_seg_counts, 4, 4, mb_row, mb_col); +#if CONFIG_SBSEGMENT + } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X32) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 4, 2, mb_row, mb_col); + if (mb_row + 2 != cm->mb_rows) + count_segs(cpi, mi + 2 * mis, no_pred_segcounts, + temporal_predictor_count, + t_unpred_seg_counts, 4, 2, mb_row + 2, mb_col); + } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB32X64) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 2, 4, mb_row, mb_col); + if (mb_col + 2 != cm->mb_cols) + count_segs(cpi, mi + 2, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 2, 4, mb_row, mb_col + 2); +#endif } else { for (i = 0; i < 4; i++) { int x_idx = (i & 1) << 1, y_idx = i & 2; @@ -301,11 +316,32 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { continue; } - if (sb_mi->mbmi.sb_type) { - assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32); + if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + count_segs(cpi, sb_mi, no_pred_segcounts, + temporal_predictor_count, t_unpred_seg_counts, 2, 2, + mb_row + y_idx, mb_col + x_idx); +#if CONFIG_SBSEGMENT + } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X16) { + count_segs(cpi, sb_mi, no_pred_segcounts, + temporal_predictor_count, + t_unpred_seg_counts, 2, 1, + mb_row + y_idx, mb_col + x_idx); + if (mb_row + y_idx + 1 != cm->mb_rows) + count_segs(cpi, sb_mi + mis, no_pred_segcounts, + temporal_predictor_count, + t_unpred_seg_counts, 2, 1, + mb_row + y_idx + 1, mb_col + x_idx); + } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB16X32) { count_segs(cpi, sb_mi, no_pred_segcounts, - temporal_predictor_count, t_unpred_seg_counts, 2, + temporal_predictor_count, + t_unpred_seg_counts, 1, 2, mb_row + y_idx, mb_col + x_idx); + if (mb_col + x_idx + 1 != cm->mb_cols) + count_segs(cpi, sb_mi + 1, no_pred_segcounts, + temporal_predictor_count, + t_unpred_seg_counts, 1, 2, + mb_row + y_idx, mb_col + x_idx + 1); +#endif } else { int j; @@ -322,7 +358,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); count_segs(cpi, mb_mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, - 1, mb_row + y_idx_mb, mb_col + x_idx_mb); + 1, 1, mb_row + y_idx_mb, mb_col + x_idx_mb); } } } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 891eb25fd..c97f21e31 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -154,13 +154,50 @@ static void tokenize_b(VP9_COMP *cpi, l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); +#if CONFIG_SBSEGMENT + } else if (sb_type == BLOCK_SIZE_SB32X64) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb32x64[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + + vp9_block2left_sb32x64[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a3 = l2 = l3 = NULL; + } else if (sb_type == BLOCK_SIZE_SB64X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb64x32[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + + vp9_block2left_sb64x32[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a3 = l2 = l3 = NULL; +#endif } else if (sb_type == BLOCK_SIZE_SB32X32) { a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); a2 = a3 = l2 = l3 = NULL; +#if CONFIG_SBSEGMENT + } else if (sb_type == BLOCK_SIZE_SB16X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb16x32[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + + vp9_block2left_sb16x32[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a1 = l1 = a2 = l2 = a3 = l3 = NULL; + } else if (sb_type == BLOCK_SIZE_SB32X16) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb32x16[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + + vp9_block2left_sb32x16[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a1 = l1 = a2 = l2 = a3 = l3 = NULL; +#endif } else { + assert(sb_type == BLOCK_SIZE_MB16X16); a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; a1 = l1 = a2 = l2 = a3 = l3 = NULL; @@ -290,7 +327,7 @@ static void tokenize_b(VP9_COMP *cpi, else #endif t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0); - assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); + assert(vp9_coef_encodings[t->Token].len - t->skip_eob_node > 0); if (!dry_run) { ++counts[type][ref][band][pt][token]; if (!t->skip_eob_node) @@ -397,11 +434,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (mbmi->mb_skip_coeff) { if (!dry_run) cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cm->mb_no_coeff_skip) { - vp9_stuff_sb(cpi, xd, t, dry_run, bsize); - } else { - vp9_reset_sb_tokens_context(xd, bsize); - } + vp9_reset_sb_tokens_context(xd, bsize); if (dry_run) *t = t_backup; return; @@ -490,11 +523,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, if (xd->mode_info_context->mbmi.mb_skip_coeff) { if (!dry_run) cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cpi->common.mb_no_coeff_skip) { - vp9_stuff_mb(cpi, xd, t, dry_run); - } else { - vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); - } + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); if (dry_run) *t = t_backup; diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h index 832471aa8..af2c122e0 100644 --- a/vp9/encoder/vp9_treewriter.h +++ b/vp9/encoder/vp9_treewriter.h @@ -23,7 +23,8 @@ typedef BOOL_CODER vp9_writer; #define vp9_write encode_bool #define vp9_write_literal vp9_encode_value -#define vp9_write_bit(W, V) vp9_write(W, V, vp9_prob_half) +#define vp9_write_bit(w, v) vp9_write((w), (v), vp9_prob_half) +#define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8) /* Approximate length of an encoded bool in 256ths of a bit at given prob */ @@ -67,11 +68,9 @@ static INLINE void treed_write(vp9_writer *const w, } while (n); } -static INLINE void write_token(vp9_writer *const w, - vp9_tree t, - const vp9_prob *const p, - vp9_token *const x) { - treed_write(w, t, p, x->value, x->Len); +static INLINE void write_token(vp9_writer *w, vp9_tree t, const vp9_prob *p, + const struct vp9_token *x) { + treed_write(w, t, p, x->value, x->len); } static INLINE int treed_cost(vp9_tree t, @@ -91,10 +90,9 @@ static INLINE int treed_cost(vp9_tree t, return c; } -static INLINE int cost_token(vp9_tree t, - const vp9_prob *const p, - vp9_token *const x) { - return treed_cost(t, p, x->value, x->Len); +static INLINE int cost_token(vp9_tree t, const vp9_prob *p, + const struct vp9_token *x) { + return treed_cost(t, p, x->value, x->len); } /* Fill array of costs for all possible token values. */ diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index d07a65b45..fb66f4f0b 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -24,6 +24,144 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { return sum; } +#if CONFIG_SBSEGMENT +unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); +} + +unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 64, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); + + return vp9_variance64x32_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); +} + +unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 65, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 32, vfilter); + + return vp9_variance32x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} + +unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); + + return vp9_variance32x16_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} + +unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 16, vfilter); + + return vp9_variance16x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); +} +#endif + unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -139,17 +277,18 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse) { uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; - uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering + const int16_t *hfilter, *vfilter; + uint16_t fdata3[5 * 4]; // Temp data bufffer used in filtering - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); // First filter 1d Horizontal - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 4, hfilter); // Now filter Verticaly - var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); } @@ -162,15 +301,16 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[9 * 8]; // Temp data bufffer used in filtering + uint16_t fdata3[9 * 8]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -182,15 +322,16 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[17 * 16]; // Temp data bufffer used in filtering + uint16_t fdata3[17 * 16]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -202,16 +343,16 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering uint8_t temp2[68 * 64]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, - 1, 65, 64, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 65, 64, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -223,15 +364,16 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[33 * 32]; // Temp data bufffer used in filtering + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering uint8_t temp2[36 * 32]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -363,15 +505,16 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[16 * 9]; // Temp data bufffer used in filtering + uint16_t fdata3[16 * 9]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -383,16 +526,16 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[9 * 16]; // Temp data bufffer used in filtering + uint16_t fdata3[9 * 16]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, - 1, 17, 8, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } diff --git a/vp9/encoder/x86/vp9_dct_sse2.asm b/vp9/encoder/x86/vp9_dct_sse2.asm deleted file mode 100644 index bbd6086da..000000000 --- a/vp9/encoder/x86/vp9_dct_sse2.asm +++ /dev/null @@ -1,432 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE 0 -%if ABI_IS_32BIT - %define input rsi - %define output rdi - %define pitch rax - push rbp - mov rbp, rsp - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) - mov rdi, arg(1) - - movsxd rax, dword ptr arg(2) - lea rcx, [rsi + rax*2] -%else - %if LIBVPX_YASM_WIN64 - %define input rcx - %define output rdx - %define pitch r8 - SAVE_XMM 7, u - %else - %define input rdi - %define output rsi - %define pitch rdx - %endif -%endif -%endmacro - -%macro STACK_FRAME_DESTROY 0 - %define input - %define output - %define pitch - -%if ABI_IS_32BIT - pop rdi - pop rsi - RESTORE_GOT - pop rbp -%else - %if LIBVPX_YASM_WIN64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - -;void vp9_short_fdct4x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_fdct4x4_sse2) PRIVATE -sym(vp9_short_fdct4x4_sse2): - - STACK_FRAME_CREATE - - movq xmm0, MMWORD PTR[input ] ;03 02 01 00 - movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10 - lea input, [input+2*pitch] - movq xmm1, MMWORD PTR[input ] ;23 22 21 20 - movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30 - - punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 - punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 - - movdqa xmm2, xmm0 - punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 - punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 - movdqa xmm1, xmm0 - punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00 - pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx - pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx - - punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03 - movdqa xmm3, xmm0 - paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1 - psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 - psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 - psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 - - movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 - movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 - - paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] - psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw xmm0, xmm1 ;op[2] op[0] - packssdw xmm3, xmm4 ;op[3] op[1] - ; 23 22 21 20 03 02 01 00 - ; - ; 33 32 31 30 13 12 11 10 - ; - movdqa xmm2, xmm0 - punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00 - punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30 - - movdqa xmm3, xmm0 - punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00 - punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01 - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 - punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 - - movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] - pshufd xmm2, xmm2, 04eh - movdqa xmm3, xmm0 - paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 - psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1 - - pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1 - movdqa xmm2, xmm3 ;save d1 for compare - pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1 - pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1 - pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1 - pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 - pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 - movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 - - pxor xmm4, xmm4 ;zero out for compare - paddd xmm0, xmm5 - paddd xmm1, xmm5 - pcmpeqw xmm2, xmm4 - psrad xmm0, 4 ;(a1 + b1 + 7)>>4 - psrad xmm1, 4 ;(a1 - b1 + 7)>>4 - pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, - ;and keep bit 0 of lower - - movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 - paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] - packssdw xmm0, xmm1 ;op[8] op[0] - psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 - psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 - - packssdw xmm3, xmm4 ;op[12] op[4] - movdqa xmm1, xmm0 - paddw xmm3, xmm2 ;op[4] += (d1!=0) - punpcklqdq xmm0, xmm3 ;op[4] op[0] - punpckhqdq xmm1, xmm3 ;op[12] op[8] - - movdqa XMMWORD PTR[output + 0], xmm0 - movdqa XMMWORD PTR[output + 16], xmm1 - - STACK_FRAME_DESTROY - -;void vp9_short_fdct8x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_fdct8x4_sse2) PRIVATE -sym(vp9_short_fdct8x4_sse2): - - STACK_FRAME_CREATE - - ; read the input data - movdqa xmm0, [input ] - movdqa xmm2, [input+ pitch] - lea input, [input+2*pitch] - movdqa xmm4, [input ] - movdqa xmm3, [input+ pitch] - - ; transpose for the first stage - movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 - movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 - - punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 - punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 - - punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 - punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 - - movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 - punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 - - punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 - - movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 - punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 - - punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 - movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 - - punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 - punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 - - movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 - punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 - - punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 - - ; xmm0 0 - ; xmm1 1 - ; xmm2 2 - ; xmm3 3 - - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a1 = 0 + 3 - paddw xmm1, xmm2 ; b1 = 1 + 2 - - psubw xmm4, xmm2 ; c1 = 1 - 2 - psubw xmm5, xmm3 ; d1 = 0 - 3 - - psllw xmm5, 3 - psllw xmm4, 3 - - psllw xmm0, 3 - psllw xmm1, 3 - - ; output 0 and 2 - movdqa xmm2, xmm0 ; a1 - - paddw xmm0, xmm1 ; op[0] = a1 + b1 - psubw xmm2, xmm1 ; op[2] = a1 - b1 - - ; output 1 and 3 - ; interleave c1, d1 - movdqa xmm1, xmm5 ; d1 - punpcklwd xmm1, xmm4 ; c1 d1 - punpckhwd xmm5, xmm4 ; c1 d1 - - movdqa xmm3, xmm1 - movdqa xmm4, xmm5 - - pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd xmm1, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm4, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm3, XMMWORD PTR[GLOBAL(_7500)] - paddd xmm5, XMMWORD PTR[GLOBAL(_7500)] - - psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw xmm1, xmm4 ; op[1] - packssdw xmm3, xmm5 ; op[3] - - ; done with vertical - ; transpose for the second stage - movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34 - movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36 - - punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31 - punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35 - - punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33 - punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 - - movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31 - punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13 - - punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33 - - movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35 - punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17 - - punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37 - movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33 - - punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37 - punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27 - - movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13 - punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07 - - punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17 - - ; xmm0 0 - ; xmm1 4 - ; xmm2 1 - ; xmm3 3 - - movdqa xmm5, xmm0 - movdqa xmm2, xmm1 - - paddw xmm0, xmm3 ; a1 = 0 + 3 - paddw xmm1, xmm4 ; b1 = 1 + 2 - - psubw xmm4, xmm2 ; c1 = 1 - 2 - psubw xmm5, xmm3 ; d1 = 0 - 3 - - pxor xmm6, xmm6 ; zero out for compare - - pcmpeqw xmm6, xmm5 ; d1 != 0 - - pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper, - ; and keep bit 0 of lower - - ; output 0 and 2 - movdqa xmm2, xmm0 ; a1 - - paddw xmm0, xmm1 ; a1 + b1 - psubw xmm2, xmm1 ; a1 - b1 - - paddw xmm0, XMMWORD PTR[GLOBAL(_7w)] - paddw xmm2, XMMWORD PTR[GLOBAL(_7w)] - - psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4 - psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4 - - ; output 1 and 3 - ; interleave c1, d1 - movdqa xmm1, xmm5 ; d1 - punpcklwd xmm1, xmm4 ; c1 d1 - punpckhwd xmm5, xmm4 ; c1 d1 - - movdqa xmm3, xmm1 - movdqa xmm4, xmm5 - - pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd xmm1, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm4, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm3, XMMWORD PTR[GLOBAL(_51000)] - paddd xmm5, XMMWORD PTR[GLOBAL(_51000)] - - psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - - packssdw xmm1, xmm4 ; op[4] - packssdw xmm3, xmm5 ; op[12] - - paddw xmm1, xmm6 ; op[4] += (d1!=0) - - movdqa xmm4, xmm0 - movdqa xmm5, xmm2 - - punpcklqdq xmm0, xmm1 - punpckhqdq xmm4, xmm1 - - punpcklqdq xmm2, xmm3 - punpckhqdq xmm5, xmm3 - - movdqa XMMWORD PTR[output + 0 ], xmm0 - movdqa XMMWORD PTR[output + 16], xmm2 - movdqa XMMWORD PTR[output + 32], xmm4 - movdqa XMMWORD PTR[output + 48], xmm5 - - STACK_FRAME_DESTROY - -SECTION_RODATA -align 16 -_5352_2217: - dw 5352 - dw 2217 - dw 5352 - dw 2217 - dw 5352 - dw 2217 - dw 5352 - dw 2217 -align 16 -_2217_neg5352: - dw 2217 - dw -5352 - dw 2217 - dw -5352 - dw 2217 - dw -5352 - dw 2217 - dw -5352 -align 16 -_mult_add: - times 8 dw 1 -align 16 -_cmp_mask: - times 4 dw 1 - times 4 dw 0 -align 16 -_cmp_mask8x4: - times 8 dw 1 -align 16 -_mult_sub: - dw 1 - dw -1 - dw 1 - dw -1 - dw 1 - dw -1 - dw 1 - dw -1 -align 16 -_7: - times 4 dd 7 -align 16 -_7w: - times 8 dw 7 -align 16 -_14500: - times 4 dd 14500 -align 16 -_7500: - times 4 dd 7500 -align 16 -_12000: - times 4 dd 12000 -align 16 -_51000: - times 4 dd 51000 diff --git a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c index 358d979eb..49cb837e0 100644 --- a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c +++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c @@ -11,6 +11,111 @@ #include <emmintrin.h> // SSE2 #include "vp9/common/vp9_idct.h" // for cospi constants +void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) { + // The 2D transform is done with two passes which are actually pretty + // similar. In the first one, we transform the columns and transpose + // the results. In the second one, we transform the rows. To achieve that, + // as the first pass results are transposed, we tranpose the columns (that + // is the transposed rows) and transpose the results (so that it goes back + // in normal/row positions). + const int stride = pitch >> 1; + int pass; + // Constants + // When we use them, in one case, they are all the same. In all others + // it's a pair of them that we need to repeat four times. This is done + // by constructing the 32 bit constant corresponding to that pair. + const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); + const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); + const __m128i kOne = _mm_set1_epi16(1); + __m128i in0, in1, in2, in3; + // Load inputs. + { + in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in2 = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); + in3 = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); + // x = x << 4 + in0 = _mm_slli_epi16(in0, 4); + in1 = _mm_slli_epi16(in1, 4); + in2 = _mm_slli_epi16(in2, 4); + in3 = _mm_slli_epi16(in3, 4); + // if (i == 0 && input[0]) input[0] += 1; + { + // The mask will only contain wether the first value is zero, all + // other comparison will fail as something shifted by 4 (above << 4) + // can never be equal to one. To increment in the non-zero case, we + // add the mask and one for the first element: + // - if zero, mask = -1, v = v - 1 + 1 = v + // - if non-zero, mask = 0, v = v + 0 + 1 = v + 1 + __m128i mask = _mm_cmpeq_epi16(in0, k__nonzero_bias_a); + in0 = _mm_add_epi16(in0, mask); + in0 = _mm_add_epi16(in0, k__nonzero_bias_b); + } + } + // Do the two transform/transpose passes + for (pass = 0; pass < 2; ++pass) { + // Transform 1/2: Add/substract + const __m128i r0 = _mm_add_epi16(in0, in3); + const __m128i r1 = _mm_add_epi16(in1, in2); + const __m128i r2 = _mm_sub_epi16(in1, in2); + const __m128i r3 = _mm_sub_epi16(in0, in3); + // Transform 1/2: Interleave to do the multiply by constants which gets us + // into 32 bits. + const __m128i t0 = _mm_unpacklo_epi16(r0, r1); + const __m128i t2 = _mm_unpacklo_epi16(r2, r3); + const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); + const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); + const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08); + const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24); + const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); + const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); + const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); + const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); + const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); + const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); + const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); + const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); + // Combine and transpose + const __m128i res0 = _mm_packs_epi32(w0, w2); + const __m128i res1 = _mm_packs_epi32(w4, w6); + // 00 01 02 03 20 21 22 23 + // 10 11 12 13 30 31 32 33 + const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); + const __m128i tr0_1 = _mm_unpackhi_epi16(res0, res1); + // 00 10 01 11 02 12 03 13 + // 20 30 21 31 22 32 23 33 + in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + in2 = _mm_unpackhi_epi32(tr0_0, tr0_1); + // 00 10 20 30 01 11 21 31 in0 contains 0 followed by 1 + // 02 12 22 32 03 13 23 33 in2 contains 2 followed by 3 + if (0 == pass) { + // Extract values in the high part for second pass as transform code + // only uses the first four values. + in1 = _mm_unpackhi_epi64(in0, in0); + in3 = _mm_unpackhi_epi64(in2, in2); + } else { + // Post-condition output and store it (v + 1) >> 2, taking advantage + // of the fact 1/3 are stored just after 0/2. + __m128i out01 = _mm_add_epi16(in0, kOne); + __m128i out23 = _mm_add_epi16(in2, kOne); + out01 = _mm_srai_epi16(out01, 2); + out23 = _mm_srai_epi16(out23, 2); + _mm_storeu_si128((__m128i *)(output + 0 * 4), out01); + _mm_storeu_si128((__m128i *)(output + 2 * 4), out23); + } + } +} + +void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) { + vp9_short_fdct4x4_sse2(input, output, pitch); + vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch); +} + void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) { const int stride = pitch >> 1; int pass; diff --git a/vp9/encoder/x86/vp9_sad4d_sse2.asm b/vp9/encoder/x86/vp9_sad4d_sse2.asm index 3716d91ec..25dd064e1 100644 --- a/vp9/encoder/x86/vp9_sad4d_sse2.asm +++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm @@ -215,7 +215,11 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ INIT_XMM sse2 SADNXN4D 64, 64 +SADNXN4D 64, 32 +SADNXN4D 32, 64 SADNXN4D 32, 32 +SADNXN4D 32, 16 +SADNXN4D 16, 32 SADNXN4D 16, 16 SADNXN4D 16, 8 SADNXN4D 8, 16 diff --git a/vp9/encoder/x86/vp9_sad_sse2.asm b/vp9/encoder/x86/vp9_sad_sse2.asm index ea482e071..ea92377ee 100644 --- a/vp9/encoder/x86/vp9_sad_sse2.asm +++ b/vp9/encoder/x86/vp9_sad_sse2.asm @@ -14,11 +14,11 @@ SECTION .text ; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); -INIT_XMM sse2 -cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows +%macro SAD64XN 1 +cglobal sad64x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided - mov n_rowsd, 64 + mov n_rowsd, %1 pxor m0, m0 .loop: movu m1, [refq] @@ -42,14 +42,19 @@ cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows paddd m0, m1 movd eax, m0 RET +%endmacro + +INIT_XMM sse2 +SAD64XN 64 ; sad64x64_sse2 +SAD64XN 32 ; sad64x32_sse2 ; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); -INIT_XMM sse2 -cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows +%macro SAD32XN 1 +cglobal sad32x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided - mov n_rowsd, 16 + mov n_rowsd, %1/2 pxor m0, m0 .loop: @@ -74,6 +79,12 @@ cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows paddd m0, m1 movd eax, m0 RET +%endmacro + +INIT_XMM sse2 +SAD32XN 64 ; sad32x64_sse2 +SAD32XN 32 ; sad32x32_sse2 +SAD32XN 16 ; sad32x16_sse2 ; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); @@ -112,6 +123,7 @@ cglobal sad16x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \ %endmacro INIT_XMM sse2 +SAD16XN 32 ; sad16x32_sse2 SAD16XN 16 ; sad16x16_sse2 SAD16XN 8 ; sad16x8_sse2 diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 43dba1373..13785f71b 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -90,7 +90,6 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.h VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_subtract_mmx.asm -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm |