summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/vp9_boolcoder_test.cc2
-rw-r--r--vp9/common/vp9_alloccommon.c1
-rw-r--r--vp9/common/vp9_blockd.c513
-rw-r--r--vp9/common/vp9_blockd.h92
-rw-r--r--vp9/common/vp9_common.h3
-rw-r--r--vp9/common/vp9_context.c397
-rw-r--r--vp9/common/vp9_entropy.c10
-rw-r--r--vp9/common/vp9_entropy.h10
-rw-r--r--vp9/common/vp9_entropymode.c118
-rw-r--r--vp9/common/vp9_entropymode.h31
-rw-r--r--vp9/common/vp9_entropymv.c12
-rw-r--r--vp9/common/vp9_entropymv.h16
-rw-r--r--vp9/common/vp9_enums.h12
-rw-r--r--vp9/common/vp9_findnearmv.c2
-rw-r--r--vp9/common/vp9_loopfilter.c564
-rw-r--r--vp9/common/vp9_loopfilter.h1
-rw-r--r--vp9/common/vp9_mvref_common.c8
-rw-r--r--vp9/common/vp9_onyxc_int.h6
-rw-r--r--vp9/common/vp9_reconinter.c181
-rw-r--r--vp9/common/vp9_reconinter.h78
-rw-r--r--vp9/common/vp9_reconintra.c155
-rw-r--r--vp9/common/vp9_reconintra4x4.c259
-rw-r--r--vp9/common/vp9_rtcd_defs.sh96
-rw-r--r--vp9/common/vp9_treecoder.c19
-rw-r--r--vp9/common/vp9_treecoder.h14
-rw-r--r--vp9/decoder/vp9_dboolhuff.c60
-rw-r--r--vp9/decoder/vp9_dboolhuff.h49
-rw-r--r--vp9/decoder/vp9_decodemv.c494
-rw-r--r--vp9/decoder/vp9_decodemv.h4
-rw-r--r--vp9/decoder/vp9_decodframe.c415
-rw-r--r--vp9/decoder/vp9_dequantize.c189
-rw-r--r--vp9/decoder/vp9_dequantize.h28
-rw-r--r--vp9/decoder/vp9_detokenize.c25
-rw-r--r--vp9/decoder/vp9_idct_blk.c53
-rw-r--r--vp9/decoder/vp9_treereader.h4
-rw-r--r--vp9/decoder/x86/vp9_dequantize_x86.c135
-rw-r--r--vp9/encoder/vp9_bitstream.c274
-rw-r--r--vp9/encoder/vp9_block.h1
-rw-r--r--vp9/encoder/vp9_dct.c82
-rw-r--r--vp9/encoder/vp9_encodeframe.c457
-rw-r--r--vp9/encoder/vp9_encodemv.c80
-rw-r--r--vp9/encoder/vp9_firstpass.c32
-rw-r--r--vp9/encoder/vp9_mcomp.c7
-rw-r--r--vp9/encoder/vp9_onyx_if.c150
-rw-r--r--vp9/encoder/vp9_onyx_int.h30
-rw-r--r--vp9/encoder/vp9_quantize.c6
-rw-r--r--vp9/encoder/vp9_ratectrl.c2
-rw-r--r--vp9/encoder/vp9_rdopt.c447
-rw-r--r--vp9/encoder/vp9_sad_c.c96
-rw-r--r--vp9/encoder/vp9_segmentation.c52
-rw-r--r--vp9/encoder/vp9_tokenize.c51
-rw-r--r--vp9/encoder/vp9_treewriter.h18
-rw-r--r--vp9/encoder/vp9_variance_c.c231
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.asm432
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2_intrinsics.c105
-rw-r--r--vp9/encoder/x86/vp9_sad4d_sse2.asm4
-rw-r--r--vp9/encoder/x86/vp9_sad_sse2.asm24
-rw-r--r--vp9/vp9cx.mk1
58 files changed, 3549 insertions, 3089 deletions
diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc
index bde28a5f3..6405a6cb5 100644
--- a/test/vp9_boolcoder_test.cc
+++ b/test/vp9_boolcoder_test.cc
@@ -77,7 +77,7 @@ TEST(VP9, TestBitIO) {
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
- GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit)
+ GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
<< "pos: " << i << " / " << bits_to_test
<< " bit_method: " << bit_method
<< " method: " << method;
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 48f9be1b3..0628a88b7 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -179,7 +179,6 @@ void vp9_create_common(VP9_COMMON *oci) {
vp9_default_bmode_probs(oci->fc.bmode_prob);
oci->txfm_mode = ONLY_4X4;
- oci->mb_no_coeff_skip = 1;
oci->comp_pred_mode = HYBRID_PREDICTION;
oci->no_lpf = 0;
oci->filter_type = NORMAL_LOOPFILTER;
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index 9151622d3..6a68f6ee2 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -9,6 +9,7 @@
*/
+#include "./vpx_config.h"
#include "vp9/common/vp9_blockd.h"
#include "vpx_mem/vpx_mem.h"
@@ -58,6 +59,139 @@ const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = {
};
#define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)
+
+#if CONFIG_SBSEGMENT
+const uint8_t vp9_block2left_sb16x32[TX_SIZE_MAX_MB][48] = {
+ { 0, 0, 0, 0,
+ 1, 1, 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3),
+ 4, 4,
+ 5, 5,
+ S(4), S(4),
+ S(5), S(5),
+ 6, 6,
+ 7, 7,
+ S(6), S(6),
+ S(7), S(7) },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2),
+ 4, 4,
+ 4, 4,
+ S(4), S(4),
+ S(4), S(4),
+ 6, 6,
+ 6, 6,
+ S(6), S(6),
+ S(6), S(6) },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0) },
+};
+const uint8_t vp9_block2above_sb16x32[TX_SIZE_MAX_MB][48] = {
+ { 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 4, 5,
+ 4, 5,
+ 4, 5,
+ 4, 5,
+ 6, 7,
+ 6, 7,
+ 6, 7,
+ 6, 7 },
+ { 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 4, 4,
+ 4, 4,
+ 4, 4,
+ 4, 4,
+ 6, 6,
+ 6, 6,
+ 6, 6,
+ 6, 6 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0 },
+};
+
+const uint8_t vp9_block2left_sb32x16[TX_SIZE_MAX_MB][48] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4,
+ 5, 5, 5, 5,
+ 6, 6, 6, 6,
+ 7, 7, 7, 7 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+const uint8_t vp9_block2above_sb32x16[TX_SIZE_MAX_MB][48] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+#endif
+
const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = {
{ 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1,
@@ -177,6 +311,353 @@ const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = {
#define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT))
#define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT))
+
+#if CONFIG_SBSEGMENT
+const uint8_t vp9_block2left_sb32x64[TX_SIZE_MAX_SB][192] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3),
+ 4, 4, 4, 4,
+ 5, 5, 5, 5,
+ S(4), S(4), S(4), S(4),
+ S(5), S(5), S(5), S(5),
+ T(4), T(4), T(4), T(4),
+ T(5), T(5), T(5), T(5),
+ U(4), U(4), U(4), U(4),
+ U(5), U(5), U(5), U(5),
+ 6, 6, 6, 6,
+ 7, 7, 7, 7,
+ S(6), S(6), S(6), S(6),
+ S(7), S(7), S(7), S(7),
+ T(6), T(6), T(6), T(6),
+ T(7), T(7), T(7), T(7) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4),
+ U(4), U(4), U(4), U(4),
+ U(4), U(4), U(4), U(4),
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6),
+ U(6), U(6), U(6), U(6),
+ U(6), U(6), U(6), U(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4),
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0) },
+};
+const uint8_t vp9_block2above_sb32x64[TX_SIZE_MAX_SB][192] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+const uint8_t vp9_block2left_sb64x32[TX_SIZE_MAX_SB][192] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+const uint8_t vp9_block2above_sb64x32[TX_SIZE_MAX_SB][192] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0) },
+};
+#endif
+
const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = {
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -282,14 +763,14 @@ const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
@@ -408,18 +889,18 @@ const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 3cfdb95eb..b5a3d4530 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -21,9 +21,6 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
-#define TRUE 1
-#define FALSE 0
-
// #define MODE_STATS
#define MB_FEATURE_TREE_PROBS 3
@@ -131,18 +128,15 @@ typedef enum {
typedef enum {
B_DC_PRED, /* average of above and left pixels */
+ B_V_PRED, /* vertical prediction */
+ B_H_PRED, /* horizontal prediction */
+ B_D45_PRED,
+ B_D135_PRED,
+ B_D117_PRED,
+ B_D153_PRED,
+ B_D27_PRED,
+ B_D63_PRED,
B_TM_PRED,
-
- B_VE_PRED, /* vertical prediction */
- B_HE_PRED, /* horizontal prediction */
-
- B_LD_PRED,
- B_RD_PRED,
-
- B_VR_PRED,
- B_VL_PRED,
- B_HD_PRED,
- B_HU_PRED,
#if CONFIG_NEWBINTRAMODES
B_CONTEXT_PRED,
#endif
@@ -246,13 +240,10 @@ static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
return mb_height_log2(sb_type) + 2;
}
-typedef enum {
- BLOCK_4X4_LG2 = 0,
- BLOCK_8X8_LG2 = 2,
- BLOCK_16X16_LG2 = 4,
- BLOCK_32X32_LG2 = 6,
- BLOCK_64X64_LG2 = 8
-} BLOCK_SIZE_LG2;
+static INLINE int partition_plane(BLOCK_SIZE_TYPE sb_type) {
+ assert(mb_width_log2(sb_type) == mb_height_log2(sb_type));
+ return (mb_width_log2(sb_type) - 1);
+}
typedef struct {
MB_PREDICTION_MODE mode, uv_mode;
@@ -325,6 +316,17 @@ struct scale_factors {
int y_den;
int y_offset_q4;
int y_step_q4;
+
+ int (*scale_value_x)(int val, const struct scale_factors *scale);
+ int (*scale_value_y)(int val, const struct scale_factors *scale);
+ void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col);
+ int_mv32 (*scale_motion_vector_q3_to_q4)(const int_mv *src_mv,
+ const struct scale_factors *scale);
+ int32_t (*scale_motion_vector_component_q4)(int mv_q4,
+ int num,
+ int den,
+ int offset_q4);
+
#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
convolve_fn_t predict[2][2][8]; // horiz, vert, weight (0 - 7)
#else
@@ -435,14 +437,12 @@ typedef struct macroblockd {
/* Inverse transform function pointers. */
void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);
void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);
- void (*itxm_add)(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *output, int pitch, int stride, int eob);
+ void (*itxm_add)(int16_t *input, const int16_t *dq, uint8_t *dest,
+ int stride, int eob);
void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride, uint8_t *dst, int stride,
- struct macroblockd *xd);
+ uint8_t *dst, int stride, struct macroblockd *xd);
void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride, uint8_t *dst, int stride,
- uint16_t *eobs);
+ uint8_t *dst, int stride, uint16_t *eobs);
struct subpix_fn_table subpix;
@@ -466,15 +466,15 @@ typedef struct macroblockd {
static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
switch (mode) {
case DC_PRED: return B_DC_PRED;
- case V_PRED: return B_VE_PRED;
- case H_PRED: return B_HE_PRED;
+ case V_PRED: return B_V_PRED;
+ case H_PRED: return B_H_PRED;
case TM_PRED: return B_TM_PRED;
- case D45_PRED: return B_LD_PRED;
- case D135_PRED: return B_RD_PRED;
- case D117_PRED: return B_VR_PRED;
- case D153_PRED: return B_HD_PRED;
- case D27_PRED: return B_HU_PRED;
- case D63_PRED: return B_VL_PRED;
+ case D45_PRED: return B_D45_PRED;
+ case D135_PRED: return B_D135_PRED;
+ case D117_PRED: return B_D117_PRED;
+ case D153_PRED: return B_D153_PRED;
+ case D27_PRED: return B_D27_PRED;
+ case D63_PRED: return B_D63_PRED;
default:
assert(0);
return B_MODE_COUNT; // Dummy value
@@ -485,16 +485,16 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
switch (bmode) {
case B_TM_PRED :
- case B_RD_PRED :
+ case B_D135_PRED :
return ADST_ADST;
- case B_VE_PRED :
- case B_VR_PRED :
+ case B_V_PRED :
+ case B_D117_PRED :
return ADST_DCT;
- case B_HE_PRED :
- case B_HD_PRED :
- case B_HU_PRED :
+ case B_H_PRED :
+ case B_D153_PRED :
+ case B_D27_PRED :
return DCT_ADST;
#if CONFIG_NEWBINTRAMODES
@@ -514,6 +514,16 @@ extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96];
extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96];
extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384];
extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384];
+#if CONFIG_SBSEGMENT
+extern const uint8_t vp9_block2left_sb16x32[TX_SIZE_MAX_MB][48];
+extern const uint8_t vp9_block2above_sb16x32[TX_SIZE_MAX_MB][48];
+extern const uint8_t vp9_block2left_sb32x16[TX_SIZE_MAX_MB][48];
+extern const uint8_t vp9_block2above_sb32x16[TX_SIZE_MAX_MB][48];
+extern const uint8_t vp9_block2left_sb32x64[TX_SIZE_MAX_SB][192];
+extern const uint8_t vp9_block2above_sb32x64[TX_SIZE_MAX_SB][192];
+extern const uint8_t vp9_block2left_sb64x32[TX_SIZE_MAX_SB][192];
+extern const uint8_t vp9_block2above_sb64x32[TX_SIZE_MAX_SB][192];
+#endif
#define USE_ADST_FOR_I16X16_8X8 1
#define USE_ADST_FOR_I16X16_4X4 1
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 3ffa513ea..dbfb9ed46 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -19,9 +19,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
-#define TRUE 1
-#define FALSE 0
-
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
diff --git a/vp9/common/vp9_context.c b/vp9/common/vp9_context.c
deleted file mode 100644
index 271b45541..000000000
--- a/vp9/common/vp9_context.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vp9/common/vp9_entropy.h"
-
-/* *** GENERATED FILE: DO NOT EDIT *** */
-
-#if 0
-int Contexts[vp8_coef_counter_dimen];
-
-const int default_contexts[vp8_coef_counter_dimen] = {
- {
- // Block Type ( 0 )
- {
- // Coeff Band ( 0 )
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- },
- {
- // Coeff Band ( 1 )
- {30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593},
- {26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987},
- {10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104},
- },
- {
- // Coeff Band ( 2 )
- {25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0},
- {9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294},
- {1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879},
- },
- {
- // Coeff Band ( 3 )
- {26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0},
- {8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302},
- { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611},
- },
- {
- // Coeff Band ( 4 )
- {10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0},
- {2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073},
- { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50},
- },
- {
- // Coeff Band ( 5 )
- {10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0},
- {2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362},
- { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190},
- },
- {
- // Coeff Band ( 6 )
- {40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0},
- {6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164},
- { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345},
- },
- {
- // Coeff Band ( 7 )
- { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319},
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8},
- },
- },
- {
- // Block Type ( 1 )
- {
- // Coeff Band ( 0 )
- {3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289},
- {8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914},
- {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620},
- },
- {
- // Coeff Band ( 1 )
- {12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0},
- {11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988},
- {7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136},
- },
- {
- // Coeff Band ( 2 )
- {15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0},
- {7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980},
- {1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429},
- },
- {
- // Coeff Band ( 3 )
- {19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0},
- {9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820},
- {1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679},
- },
- {
- // Coeff Band ( 4 )
- {12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0},
- {4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127},
- { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101},
- },
- {
- // Coeff Band ( 5 )
- {12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0},
- {4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157},
- { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198},
- },
- {
- // Coeff Band ( 6 )
- {61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0},
- {15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195},
- { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507},
- },
- {
- // Coeff Band ( 7 )
- { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641},
- { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30},
- },
- },
- {
- // Block Type ( 2 )
- {
- // Coeff Band ( 0 )
- { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798},
- {1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837},
- {1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122},
- },
- {
- // Coeff Band ( 1 )
- {1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0},
- {1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063},
- {1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047},
- },
- {
- // Coeff Band ( 2 )
- { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0},
- { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404},
- { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236},
- },
- {
- // Coeff Band ( 3 )
- { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157},
- { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300},
- },
- {
- // Coeff Band ( 4 )
- { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427},
- { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7},
- },
- {
- // Coeff Band ( 5 )
- { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652},
- { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30},
- },
- {
- // Coeff Band ( 6 )
- { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517},
- { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3},
- },
- {
- // Coeff Band ( 7 )
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- },
- },
- {
- // Block Type ( 3 )
- {
- // Coeff Band ( 0 )
- {2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694},
- {8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572},
- {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284},
- },
- {
- // Coeff Band ( 1 )
- {9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0},
- {12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280},
- {10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460},
- },
- {
- // Coeff Band ( 2 )
- {6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0},
- {6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539},
- {3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138},
- },
- {
- // Coeff Band ( 3 )
- {11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0},
- {9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181},
- {4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267},
- },
- {
- // Coeff Band ( 4 )
- {4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0},
- {3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401},
- {1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268},
- },
- {
- // Coeff Band ( 5 )
- {8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0},
- {3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811},
- {1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527},
- },
- {
- // Coeff Band ( 6 )
- {27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0},
- {5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954},
- {1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979},
- },
- {
- // Coeff Band ( 7 )
- { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459},
- { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13},
- },
- },
-};
-
-// Update probabilities for the nodes in the token entropy tree.
-const vp9_prob tree_update_probs[vp9_coef_tree_dimen] = {
- {
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
- {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
- {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
- {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
- {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- },
- {
- {
- {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
- {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
- },
- {
- {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- },
- {
- {
- {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
- {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
- {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
- },
- {
- {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- },
- {
- {
- {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
- {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
- {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
- {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- {
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
- },
- },
-};
-#endif
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 500a278ff..5e6cba2ed 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -386,7 +386,7 @@ const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
};
-struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS];
+struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS];
/* Trees for extra bits. Probabilities are constant and
do not depend on previously encoded bits */
@@ -408,7 +408,7 @@ const vp9_tree_index vp9_nzc4x4_tree[2 * NZC4X4_NODES] = {
-NZC_3TO4, 8,
-NZC_5TO8, -NZC_9TO16,
};
-struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS];
+struct vp9_token vp9_nzc4x4_encodings[NZC4X4_TOKENS];
const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = {
-NZC_0, 2,
@@ -419,7 +419,7 @@ const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = {
-NZC_9TO16, 12,
-NZC_17TO32, -NZC_33TO64,
};
-struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS];
+struct vp9_token vp9_nzc8x8_encodings[NZC8X8_TOKENS];
const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = {
-NZC_0, 2,
@@ -432,7 +432,7 @@ const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = {
-NZC_33TO64, 16,
-NZC_65TO128, -NZC_129TO256,
};
-struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS];
+struct vp9_token vp9_nzc16x16_encodings[NZC16X16_TOKENS];
const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = {
-NZC_0, 2,
@@ -447,7 +447,7 @@ const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = {
-NZC_129TO256, 20,
-NZC_257TO512, -NZC_513TO1024,
};
-struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS];
+struct vp9_token vp9_nzc32x32_encodings[NZC32X32_TOKENS];
const int vp9_extranzcbits[NZC32X32_TOKENS] = {
0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 645faa2c6..db167420c 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -40,7 +40,7 @@ extern const int vp9_i8x8_block[4];
extern const vp9_tree_index vp9_coef_tree[];
-extern struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS];
+extern struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS];
typedef struct {
vp9_tree_p tree;
@@ -215,10 +215,10 @@ extern const vp9_tree_index vp9_nzc16x16_tree[];
extern const vp9_tree_index vp9_nzc32x32_tree[];
/* nzc encodings */
-extern struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS];
-extern struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS];
-extern struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS];
-extern struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS];
+extern struct vp9_token vp9_nzc4x4_encodings[NZC4X4_TOKENS];
+extern struct vp9_token vp9_nzc8x8_encodings[NZC8X8_TOKENS];
+extern struct vp9_token vp9_nzc16x16_encodings[NZC16X16_TOKENS];
+extern struct vp9_token vp9_nzc32x32_encodings[NZC32X32_TOKENS];
#define codenzc(x) (\
(x) <= 3 ? (x) : (x) <= 4 ? 3 : (x) <= 8 ? 4 : \
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 29855b633..f4182443a 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -72,18 +72,18 @@ static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
static const unsigned int bmode_cts[VP9_NKF_BINTRAMODES] = {
#if CONFIG_NEWBINTRAMODES
#if CONTEXT_PRED_REPLACEMENTS == 6
- /* DC TM VE HE CONTEXT */
+ /* DC TM V H CONTEXT */
43891, 17694, 10036, 3920, 20000
#elif CONTEXT_PRED_REPLACEMENTS == 4
- /* DC TM VE HE LD RD CONTEXT */
+ /* DC TM V H D45 D135 CONTEXT */
43891, 17694, 10036, 3920, 3363, 2546, 14000
#elif CONTEXT_PRED_REPLACEMENTS == 0
- /* DC TM VE HE LD RD VR VL HD HU CONTEXT */
- 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723, 50000
+ /* DC V H D45 D135 D117 D153 D27 D63 TM CONTEXT */
+ 43891, 10036, 3920, 3363, 2546, 5119, 2471, 1723, 3221, 17694, 50000
#endif
#else
- /* DC TM VE HE LD RD VR VL HD HU */
- 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723
+ /* DC V H D45 D135 D117 D153 D27 D63 TM */
+ 43891, 10036, 3920, 3363, 2546, 5119, 2471, 1723, 3221, 17694
#endif
};
@@ -151,18 +151,29 @@ const int vp9_mbsplit_count [VP9_NUMMBSPLITS] = { 2, 2, 4, 16};
const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150};
+#if CONFIG_SBSEGMENT
+const vp9_prob vp9_partition_probs[PARTITION_PLANES][PARTITION_TYPES - 1] = {
+ {110, 111, 150},
+ {110, 111, 150},
+};
+#else
+const vp9_prob vp9_partition_probs[PARTITION_PLANES][PARTITION_TYPES - 1] = {
+ {200}, {200},
+};
+#endif
+
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
const vp9_tree_index vp9_kf_bmode_tree[VP9_KF_BINTRAMODES * 2 - 2] = {
-B_DC_PRED, 2, /* 0 = DC_NODE */
-B_TM_PRED, 4, /* 1 = TM_NODE */
- -B_VE_PRED, 6, /* 2 = VE_NODE */
+ -B_V_PRED, 6, /* 2 = V_NODE */
8, 12, /* 3 = COM_NODE */
- -B_HE_PRED, 10, /* 4 = HE_NODE */
- -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
- -B_LD_PRED, 14, /* 6 = LD_NODE */
- -B_VL_PRED, 16, /* 7 = VL_NODE */
- -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
+ -B_H_PRED, 10, /* 4 = H_NODE */
+ -B_D135_PRED, -B_D117_PRED, /* 5 = D135_NODE */
+ -B_D45_PRED, 14, /* 6 = D45_NODE */
+ -B_D63_PRED, 16, /* 7 = D63_NODE */
+ -B_D153_PRED, -B_D27_PRED /* 8 = D153_NODE */
};
const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = {
@@ -171,36 +182,36 @@ const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = {
-B_DC_PRED, 2,
-B_TM_PRED, 4,
6, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS),
- -B_VE_PRED, -B_HE_PRED
+ -B_V_PRED, -B_H_PRED
#elif CONTEXT_PRED_REPLACEMENTS == 4
-B_DC_PRED, 2,
-B_TM_PRED, 4,
6, 8,
- -B_VE_PRED, -B_HE_PRED,
+ -B_V_PRED, -B_H_PRED,
10, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS),
- -B_RD_PRED, -B_LD_PRED,
+ -B_D135_PRED, -B_D45_PRED,
#elif CONTEXT_PRED_REPLACEMENTS == 0
-B_DC_PRED, 2, /* 0 = DC_NODE */
-B_TM_PRED, 4, /* 1 = TM_NODE */
- -B_VE_PRED, 6, /* 2 = VE_NODE */
+ -B_V_PRED, 6, /* 2 = V_NODE */
8, 12, /* 3 = COM_NODE */
- -B_HE_PRED, 10, /* 4 = HE_NODE */
- -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
- -B_LD_PRED, 14, /* 6 = LD_NODE */
- -B_VL_PRED, 16, /* 7 = VL_NODE */
- -B_HD_PRED, 18,
- -B_HU_PRED, -B_CONTEXT_PRED
+ -B_H_PRED, 10, /* 4 = H_NODE */
+ -B_D135_PRED, -B_D117_PRED, /* 5 = D135_NODE */
+ -B_D45_PRED, 14, /* 6 = D45_NODE */
+ -B_D63_PRED, 16, /* 7 = D63_NODE */
+ -B_D153_PRED, 18, /* 8 = D153_NODE */
+ -B_D27_PRED, -B_CONTEXT_PRED /* 9 = D27_NODE */
#endif
#else
-B_DC_PRED, 2, /* 0 = DC_NODE */
-B_TM_PRED, 4, /* 1 = TM_NODE */
- -B_VE_PRED, 6, /* 2 = VE_NODE */
+ -B_V_PRED, 6, /* 2 = V_NODE */
8, 12, /* 3 = COM_NODE */
- -B_HE_PRED, 10, /* 4 = HE_NODE */
- -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
- -B_LD_PRED, 14, /* 6 = LD_NODE */
- -B_VL_PRED, 16, /* 7 = VL_NODE */
- -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
+ -B_H_PRED, 10, /* 4 = H_NODE */
+ -B_D135_PRED, -B_D117_PRED, /* 5 = D135_NODE */
+ -B_D45_PRED, 14, /* 6 = D45_NODE */
+ -B_D63_PRED, 16, /* 7 = D63_NODE */
+ -B_D153_PRED, -B_D27_PRED /* 8 = D153_NODE */
#endif
};
@@ -283,19 +294,33 @@ const vp9_tree_index vp9_sub_mv_ref_tree[6] = {
-ZERO4X4, -NEW4X4
};
-struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
-struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
-struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES];
-struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES];
-struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
-struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES];
-struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES];
-struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
-struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
+#if CONFIG_SBSEGMENT
+const vp9_tree_index vp9_partition_tree[6] = {
+ -PARTITION_NONE, 2,
+ -PARTITION_HORZ, 4,
+ -PARTITION_VERT, -PARTITION_SPLIT
+};
+#else
+const vp9_tree_index vp9_partition_tree[2] = {
+ -PARTITION_NONE, -PARTITION_SPLIT
+};
+#endif
+
+struct vp9_token vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
+struct vp9_token vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
+struct vp9_token vp9_ymode_encodings[VP9_YMODES];
+struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES];
+struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
+struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES];
+struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES];
+struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
+struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
-struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS];
-struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
-struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS];
+struct vp9_token vp9_mv_ref_encoding_array[VP9_MVREFS];
+struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
+struct vp9_token vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS];
+
+struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
void vp9_init_mbmode_probs(VP9_COMMON *x) {
unsigned int bct [VP9_YMODES] [2]; /* num Ymodes > num UV modes */
@@ -332,6 +357,10 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) {
vpx_memcpy(x->fc.mbsplit_prob, vp9_mbsplit_probs, sizeof(vp9_mbsplit_probs));
vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob,
sizeof(vp9_switchable_interp_prob));
+
+ vpx_memcpy(x->fc.partition_prob, vp9_partition_probs,
+ sizeof(vp9_partition_probs));
+
#if CONFIG_COMP_INTERINTRA_PRED
x->fc.interintra_prob = VP9_DEF_INTERINTRA_PROB;
#endif
@@ -379,7 +408,7 @@ const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
-0, 2,
-1, -2
};
-struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
#if CONFIG_ENABLE_6TAP
const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = {
SIXTAP, EIGHTTAP, EIGHTTAP_SHARP};
@@ -397,7 +426,7 @@ const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
-0, -1,
};
-struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
[VP9_SWITCHABLE_FILTERS-1] = {
{248},
@@ -433,6 +462,7 @@ void vp9_entropy_mode_init() {
vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree);
vp9_tokens_from_tree(vp9_switchable_interp_encodings,
vp9_switchable_interp_tree);
+ vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree);
vp9_tokens_from_tree_offset(vp9_mv_ref_encoding_array,
vp9_mv_ref_tree, NEARESTMV);
@@ -631,6 +661,10 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
interintra_prob, factor);
}
#endif
+ for (i = 0; i < PARTITION_PLANES; i++)
+ update_mode_probs(PARTITION_TYPES, vp9_partition_tree,
+ cm->fc.partition_counts[i], cm->fc.pre_partition_prob[i],
+ cm->fc.partition_prob[i], 0);
}
static void set_default_lf_deltas(MACROBLOCKD *xd) {
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 8b0caf6eb..665569578 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -54,21 +54,25 @@ extern const vp9_tree_index vp9_mv_ref_tree[];
extern const vp9_tree_index vp9_sb_mv_ref_tree[];
extern const vp9_tree_index vp9_sub_mv_ref_tree[];
-extern struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
-extern struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
-extern struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES];
-extern struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES];
-extern struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
-extern struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES];
-extern struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
-extern struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES];
-extern struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
+extern struct vp9_token vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
+extern struct vp9_token vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
+extern struct vp9_token vp9_ymode_encodings[VP9_YMODES];
+extern struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES];
+extern struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
+extern struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES];
+extern struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
+extern struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES];
+extern struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
/* Inter mode values do not start at zero */
-extern struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS];
-extern struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
-extern struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS];
+extern struct vp9_token vp9_mv_ref_encoding_array[VP9_MVREFS];
+extern struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
+extern struct vp9_token vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS];
+
+// probability models for partition information
+extern const vp9_tree_index vp9_partition_tree[];
+extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
void vp9_entropy_mode_init(void);
@@ -107,8 +111,7 @@ extern const int vp9_is_interpolating_filter[SWITCHABLE + 1];
extern const vp9_tree_index vp9_switchable_interp_tree
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
-extern struct vp9_token_struct vp9_switchable_interp_encodings
- [VP9_SWITCHABLE_FILTERS];
+extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
extern const vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS - 1];
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 8330befbe..fe3667725 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -33,7 +33,7 @@ const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
-MV_JOINT_HNZVZ, 4,
-MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
};
-struct vp9_token_struct vp9_mv_joint_encodings[MV_JOINTS];
+struct vp9_token vp9_mv_joint_encodings[MV_JOINTS];
const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
-MV_CLASS_0, 2,
@@ -47,19 +47,19 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
-MV_CLASS_7, -MV_CLASS_8,
-MV_CLASS_9, -MV_CLASS_10,
};
-struct vp9_token_struct vp9_mv_class_encodings[MV_CLASSES];
+struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
-0, -1,
};
-struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE];
+struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
-0, 2,
-1, 4,
-2, -3
};
-struct vp9_token_struct vp9_mv_fp_encodings[4];
+struct vp9_token vp9_mv_fp_encodings[4];
const nmv_context vp9_default_nmv_context = {
{32, 64, 96},
@@ -212,10 +212,10 @@ void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
const MV_JOINT_TYPE type = vp9_get_mv_joint(*mv);
mvctx->joints[type]++;
usehp = usehp && vp9_use_nmv_hp(ref);
- if (type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ)
+ if (mv_joint_vertical(type))
increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp);
- if (type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ)
+ if (mv_joint_horizontal(type))
increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp);
}
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 162d2b44f..715b5bb2b 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -45,8 +45,16 @@ typedef enum {
MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
} MV_JOINT_TYPE;
+static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) {
+ return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ;
+}
+
+static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) {
+ return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ;
+}
+
extern const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2];
-extern struct vp9_token_struct vp9_mv_joint_encodings [MV_JOINTS];
+extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS];
/* Symbols for coding magnitude class of nonzero components */
#define MV_CLASSES 11
@@ -65,7 +73,7 @@ typedef enum {
} MV_CLASS_TYPE;
extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2];
-extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES];
+extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
#define CLASS0_SIZE (1 << CLASS0_BITS)
@@ -76,10 +84,10 @@ extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES];
#define MV_VALS ((MV_MAX << 1) + 1)
extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2];
-extern struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE];
+extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
extern const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2];
-extern struct vp9_token_struct vp9_mv_fp_encodings[4];
+extern struct vp9_token vp9_mv_fp_encodings[4];
typedef struct {
vp9_prob sign;
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index efa84c40f..930a5975f 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -27,4 +27,16 @@ typedef enum BLOCK_SIZE_TYPE {
BLOCK_SIZE_SB64X64,
} BLOCK_SIZE_TYPE;
+typedef enum PARTITION_TYPE {
+ PARTITION_NONE,
+#if CONFIG_SBSEGMENT
+ PARTITION_HORZ,
+ PARTITION_VERT,
+#endif
+ PARTITION_SPLIT,
+ PARTITION_TYPES
+} PARTITION_TYPE;
+
+#define PARTITION_PLANES 2 // number of probability models
+
#endif // VP9_COMMON_VP9_ENUMS_H_
diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c
index 053895840..bbbc2f62c 100644
--- a/vp9/common/vp9_findnearmv.c
+++ b/vp9/common/vp9_findnearmv.c
@@ -141,7 +141,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
unsigned int sse;
unsigned int ref_scores[MAX_MV_REF_CANDIDATES] = {0};
int_mv sorted_mvs[MAX_MV_REF_CANDIDATES];
- int zero_seen = FALSE;
+ int zero_seen = 0;
if (ref_y_buffer) {
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 9a5087326..a57f766db 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -16,24 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
static void lf_init_lut(loop_filter_info_n *lfi) {
- int filt_lvl;
-
- for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) {
- if (filt_lvl >= 40) {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
- } else if (filt_lvl >= 20) {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
- } else if (filt_lvl >= 15) {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
- } else {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
- }
- }
-
lfi->mode_lf_lut[DC_PRED] = 1;
lfi->mode_lf_lut[D45_PRED] = 1;
lfi->mode_lf_lut[D135_PRED] = 1;
@@ -194,25 +176,212 @@ static int mb_lf_skip(const MB_MODE_INFO *const mbmi) {
static int sb_mb_lf_skip(const MODE_INFO *const mip0,
const MODE_INFO *const mip1) {
const MB_MODE_INFO *mbmi0 = &mip0->mbmi;
- const MB_MODE_INFO *mbmi1 = &mip0->mbmi;
+ const MB_MODE_INFO *mbmi1 = &mip1->mbmi;
return mb_lf_skip(mbmi0) && mb_lf_skip(mbmi1) &&
- (mbmi0->ref_frame == mbmi1->ref_frame) &&
- (mbmi0->mv[mbmi0->ref_frame].as_int ==
- mbmi1->mv[mbmi1->ref_frame].as_int) &&
- mbmi0->ref_frame != INTRA_FRAME;
+ mbmi0->ref_frame != INTRA_FRAME &&
+ mbmi1->ref_frame != INTRA_FRAME;
+}
+
+static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi,
+ int do_left_mb_v, int do_above_mb_h,
+ int do_left_mbuv_v, int do_above_mbuv_h,
+ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr,
+ int y_stride, int uv_stride, int dering) {
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ struct loop_filter_info lfi;
+ int mode = mi->mbmi.mode;
+ int mode_index = lfi_n->mode_lf_lut[mode];
+ int seg = mi->mbmi.segment_id;
+ int ref_frame = mi->mbmi.ref_frame;
+ int filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level) {
+ const int skip_lf = mb_lf_skip(&mi->mbmi);
+ const int tx_size = mi->mbmi.txfm_size;
+ if (cm->filter_type == NORMAL_LOOPFILTER) {
+ const int hev_index = filter_level >> 4;
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (do_above_mb_h) {
+ if (tx_size >= TX_16X16)
+ vp9_lpf_mbh_w(y_ptr,
+ do_above_mbuv_h ? u_ptr : NULL,
+ do_above_mbuv_h ? v_ptr : NULL,
+ y_stride, uv_stride, &lfi);
+ else
+ vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi);
+ }
+
+ if (!skip_lf) {
+ if (tx_size >= TX_8X8) {
+ if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV))
+ vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, &lfi);
+ else
+ vp9_loop_filter_bh8x8(y_ptr, NULL, NULL,
+ y_stride, uv_stride, &lfi);
+ } else {
+ vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, &lfi);
+ }
+ }
+
+ if (do_left_mb_v) {
+ if (tx_size >= TX_16X16)
+ vp9_lpf_mbv_w(y_ptr,
+ do_left_mbuv_v ? u_ptr : NULL,
+ do_left_mbuv_v ? v_ptr : NULL,
+ y_stride, uv_stride, &lfi);
+ else
+ vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi);
+ }
+
+ if (!skip_lf) {
+ if (tx_size >= TX_8X8) {
+ if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV))
+ vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, &lfi);
+ else
+ vp9_loop_filter_bv8x8(y_ptr, NULL, NULL,
+ y_stride, uv_stride, &lfi);
+ } else {
+ vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, &lfi);
+ }
+ }
+ if (dering) {
+#if CONFIG_LOOP_DERING
+ vp9_post_proc_down_and_across(y_ptr, y_ptr,
+ y_stride, y_stride,
+ 16, 16, dering);
+ if (u_ptr && v_ptr) {
+ vp9_post_proc_down_and_across(u_ptr, u_ptr,
+ uv_stride, uv_stride,
+ 8, 8, dering);
+ vp9_post_proc_down_and_across(v_ptr, v_ptr,
+ uv_stride, uv_stride,
+ 8, 8, dering);
+ }
+#endif
+ }
+ } else {
+ // TODO(yaowu): simple loop filter
+ }
+ }
}
+static void lpf_sb32(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
+ int mb_row, int mb_col,
+ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr,
+ int y_stride, int uv_stride,
+ int y_only, int dering) {
+ BLOCK_SIZE_TYPE sb_type = mode_info_context->mbmi.sb_type;
+ TX_SIZE tx_size = mode_info_context->mbmi.txfm_size;
+ int do_left_v, do_above_h;
+ int do_left_v_mbuv, do_above_h_mbuv;
+ int mis = cm->mode_info_stride;
+ const MODE_INFO *mi;
+
+ // process 1st MB top-left
+ mi = mode_info_context;
+ do_left_v = (mb_col > 0);
+ do_above_h = (mb_row > 0);
+ do_left_v_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 &&
+ tx_size >= TX_32X32 && (mb_col & 2));
+ do_above_h_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 &&
+ tx_size >= TX_32X32 && (mb_row & 2));
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr,
+ y_only? 0 : u_ptr,
+ y_only? 0 : v_ptr,
+ y_stride, uv_stride, dering);
+ // process 2nd MB top-right
+ mi = mode_info_context + 1;
+ do_left_v = !(sb_type && (tx_size >= TX_32X32 ||
+ sb_mb_lf_skip(mode_info_context, mi)));
+ do_above_h = (mb_row > 0);
+ do_left_v_mbuv = do_left_v;
+ do_above_h_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 &&
+ tx_size >= TX_32X32 && (mb_row & 2));
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 16,
+ y_only ? 0 : (u_ptr + 8),
+ y_only ? 0 : (v_ptr + 8),
+ y_stride, uv_stride, dering);
+
+ // process 3rd MB bottom-left
+ mi = mode_info_context + mis;
+ do_left_v = (mb_col > 0);
+ do_above_h =!(sb_type && (tx_size >= TX_32X32 ||
+ sb_mb_lf_skip(mode_info_context, mi)));
+ do_left_v_mbuv = !(sb_type >= BLOCK_SIZE_SB64X64 &&
+ tx_size >= TX_32X32 && (mb_col & 2));
+ do_above_h_mbuv = do_above_h;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 16 * y_stride,
+ y_only ? 0 : (u_ptr + 8 * uv_stride),
+ y_only ? 0 : (v_ptr + 8 * uv_stride),
+ y_stride, uv_stride, dering);
+
+ // process 4th MB bottom right
+ mi = mode_info_context + mis + 1;
+ do_left_v = !(sb_type && (tx_size >= TX_32X32 ||
+ sb_mb_lf_skip(mi - 1, mi)));
+ do_above_h =!(sb_type && (tx_size >= TX_32X32 ||
+ sb_mb_lf_skip(mode_info_context + 1, mi)));
+ do_left_v_mbuv = do_left_v;
+ do_above_h_mbuv = do_above_h;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 16 * y_stride + 16,
+ y_only ? 0 : (u_ptr + 8 * uv_stride + 8),
+ y_only ? 0 : (v_ptr + 8 * uv_stride + 8),
+ y_stride, uv_stride, dering);
+}
+
+static void lpf_sb64(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
+ int mb_row, int mb_col,
+ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr,
+ int y_stride, int uv_stride,
+ int y_only, int dering) {
+ lpf_sb32(cm, mode_info_context, mb_row, mb_col,
+ y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, y_only, dering);
+ lpf_sb32(cm, mode_info_context + 2, mb_row, mb_col + 2,
+ y_ptr + 32, u_ptr + 16, v_ptr + 16,
+ y_stride, uv_stride, y_only, dering);
+ lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 2,
+ mb_row + 2, mb_col,
+ y_ptr + 32 * y_stride,
+ u_ptr + 16 * uv_stride,
+ v_ptr + 16 * uv_stride,
+ y_stride, uv_stride, y_only, dering);
+ lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 2 + 2,
+ mb_row + 2, mb_col + 2,
+ y_ptr + 32 * y_stride + 32,
+ u_ptr + 16 * uv_stride + 16,
+ v_ptr + 16 * uv_stride + 16,
+ y_stride, uv_stride, y_only, dering);
+}
void vp9_loop_filter_frame(VP9_COMMON *cm,
MACROBLOCKD *xd,
int frame_filter_level,
int y_only,
int dering) {
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
- loop_filter_info_n *lfi_n = &cm->lf_info;
- struct loop_filter_info lfi;
- const FRAME_TYPE frame_type = cm->frame_type;
int mb_row, mb_col;
-
+ const int sb64_rows = cm->mb_rows / 4;
+ const int sb64_cols = cm->mb_cols / 4;
+ const int extra_sb32_row = (cm->mb_rows & 2) != 0;
+ const int extra_sb32_col = (cm->mb_cols & 2) != 0;
+ const int extra_mb_col = cm->mb_cols & 1;
+ const int extra_mb_row = cm->mb_rows & 1;
// Set up the buffer pointers
uint8_t *y_ptr = post->y_buffer;
uint8_t *u_ptr = y_only ? 0 : post->u_buffer;
@@ -220,172 +389,197 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
// Point at base of Mb MODE_INFO list
const MODE_INFO *mode_info_context = cm->mi;
+ const MODE_INFO *mi;
const int mis = cm->mode_info_stride;
+ const int y_stride = post->y_stride;
+ const int uv_stride = post->uv_stride;
+ // These two flags signal if MB left edge and above edge
+ // should be filtered using MB edge filter. Currently, MB
+ // edge filtering is not applied on MB edge internal to a
+ // 32x32 superblock if:
+ // 1) SB32 is using 32x32 prediction and 32x32 transform
+ // 2) SB32 is using 32x32 prediction and 16x16 transform
+ // but all coefficients are zero.
+ // MB edges are on 32x32 superblock boundary are always
+ // filtered except on image frame boundary.
+ int do_left_v, do_above_h;
+ // These two flags signal if MB UV left edge and above edge
+ // should be filtered using MB edge filter. Currently, MB
+ // edge filtering is not applied for MB edges internal to
+ // a 32x32 superblock if:
+ // 1) SB32 is using 32x32 prediction and 32x32 transform
+ // 2) SB32 is using 32x32 prediction and 16x16 transform
+ // but all coefficients are zero.
+ // 3) SB32 UV edges internal to a SB64 and 32x32 transform
+ // is used, i.e. UV is doing 32x32 transform hence no
+ // transform boundary exists inside the SB64 for UV
+ int do_left_v_mbuv, do_above_h_mbuv;
// Initialize the loop filter for this frame.
vp9_loop_filter_frame_init(cm, xd, frame_filter_level);
- // vp9_filter each macro block
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
- const MB_PREDICTION_MODE mode = mode_info_context->mbmi.mode;
- const int mode_index = lfi_n->mode_lf_lut[mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
- const int filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
- if (filter_level) {
- const int skip_lf = mb_lf_skip(&mode_info_context->mbmi);
- const int tx_size = mode_info_context->mbmi.txfm_size;
- if (cm->filter_type == NORMAL_LOOPFILTER) {
- const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
- lfi.mblim = lfi_n->mblim[filter_level];
- lfi.blim = lfi_n->blim[filter_level];
- lfi.lim = lfi_n->lim[filter_level];
- lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
- if (mb_col > 0 &&
- !((mb_col & 1) && mode_info_context->mbmi.sb_type &&
- (sb_mb_lf_skip(mode_info_context - 1, mode_info_context) ||
- tx_size >= TX_32X32))
- ) {
- if (tx_size >= TX_16X16)
- vp9_lpf_mbv_w(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- else
- vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- }
- if (!skip_lf) {
- if (tx_size >= TX_8X8) {
- if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV))
- vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- else
- vp9_loop_filter_bv8x8(y_ptr, NULL, NULL, post->y_stride,
- post->uv_stride, &lfi);
- } else {
- vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- }
- }
- /* don't apply across umv border */
- if (mb_row > 0 &&
- !((mb_row & 1) && mode_info_context->mbmi.sb_type &&
- (sb_mb_lf_skip(mode_info_context - mis, mode_info_context) ||
- tx_size >= TX_32X32))
- ) {
- if (tx_size >= TX_16X16)
- vp9_lpf_mbh_w(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- else
- vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- }
- if (!skip_lf) {
- if (tx_size >= TX_8X8) {
- if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV))
- vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- else
- vp9_loop_filter_bh8x8(y_ptr, NULL, NULL, post->y_stride,
- post->uv_stride, &lfi);
- } else {
- vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, post->y_stride,
- post->uv_stride, &lfi);
- }
- }
-#if CONFIG_LOOP_DERING
- if (dering) {
- if (mb_row && mb_row < cm->mb_rows - 1 &&
- mb_col && mb_col < cm->mb_cols - 1) {
- vp9_post_proc_down_and_across(y_ptr, y_ptr,
- post->y_stride, post->y_stride,
- 16, 16, dering);
- if (!y_only) {
- vp9_post_proc_down_and_across(u_ptr, u_ptr,
- post->uv_stride, post->uv_stride,
- 8, 8, dering);
- vp9_post_proc_down_and_across(v_ptr, v_ptr,
- post->uv_stride, post->uv_stride,
- 8, 8, dering);
- }
- } else {
- // Adjust the filter so that no out-of-frame data is used.
- uint8_t *dr_y = y_ptr, *dr_u = u_ptr, *dr_v = v_ptr;
- int w_adjust = 0;
- int h_adjust = 0;
-
- if (mb_col == 0) {
- dr_y += 2;
- dr_u += 2;
- dr_v += 2;
- w_adjust += 2;
- }
- if (mb_col == cm->mb_cols - 1)
- w_adjust += 2;
- if (mb_row == 0) {
- dr_y += 2 * post->y_stride;
- dr_u += 2 * post->uv_stride;
- dr_v += 2 * post->uv_stride;
- h_adjust += 2;
- }
- if (mb_row == cm->mb_rows - 1)
- h_adjust += 2;
- vp9_post_proc_down_and_across_c(dr_y, dr_y,
- post->y_stride, post->y_stride,
- 16 - w_adjust, 16 - h_adjust,
- dering);
- if (!y_only) {
- vp9_post_proc_down_and_across_c(dr_u, dr_u,
- post->uv_stride,
- post->uv_stride,
- 8 - w_adjust, 8 - h_adjust,
- dering);
- vp9_post_proc_down_and_across_c(dr_v, dr_v,
- post->uv_stride,
- post->uv_stride,
- 8 - w_adjust, 8 - h_adjust,
- dering);
- }
- }
- }
-#endif
- } else {
- // FIXME: Not 8x8 aware
- if (mb_col > 0 &&
- !(skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) &&
- !((mb_col & 1) && mode_info_context->mbmi.sb_type))
- vp9_loop_filter_simple_mbv(y_ptr, post->y_stride,
- lfi_n->mblim[filter_level]);
- if (!skip_lf)
- vp9_loop_filter_simple_bv(y_ptr, post->y_stride,
- lfi_n->blim[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0 &&
- !(skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) &&
- !((mb_row & 1) && mode_info_context->mbmi.sb_type))
- vp9_loop_filter_simple_mbh(y_ptr, post->y_stride,
- lfi_n->mblim[filter_level]);
- if (!skip_lf)
- vp9_loop_filter_simple_bh(y_ptr, post->y_stride,
- lfi_n->blim[filter_level]);
- }
- }
+ // vp9_filter each 64x64 SB
+ // For each SB64: the 4 SB32 are filtered in raster scan order
+ // For each SB32: the 4 MBs are filtered in raster scan order
+ // For each MB: the left and above MB edges as well as the
+ // internal block edges are processed together
+ for (mb_row = 0; mb_row < sb64_rows * 4; mb_row += 4) {
+ for (mb_col = 0; mb_col < sb64_cols * 4; mb_col += 4) {
+ lpf_sb64(cm, mode_info_context, mb_row, mb_col,
+ y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, y_only, dering);
+ y_ptr += 64;
+ u_ptr = y_only? 0 : u_ptr + 32;
+ v_ptr = y_only? 0 : v_ptr + 32;
+ mode_info_context += 4; // step to next SB64
+ }
+ if (extra_sb32_col) {
+ // process 2 SB32s in the extra SB32 col
+ lpf_sb32(cm, mode_info_context, mb_row, mb_col,
+ y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, y_only, dering);
+ lpf_sb32(cm, mode_info_context + mis * 2,
+ mb_row + 2, mb_col,
+ y_ptr + 32 * y_stride,
+ u_ptr + 16 * uv_stride,
+ v_ptr + 16 * uv_stride,
+ y_stride, uv_stride, y_only, dering);
+ y_ptr += 32;
+ u_ptr = y_only? 0 : u_ptr + 16;
+ v_ptr = y_only? 0 : v_ptr + 16;
+ mode_info_context += 2; // step to next SB32
+ mb_col += 2;
+ }
+ if (extra_mb_col) {
+ // process 4 MB in the extra MB col
+ // process 1st MB
+ mi = mode_info_context;
+ do_left_v = (mb_col > 0);
+ do_above_h = (mb_row > 0);
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr,
+ y_only? 0 : u_ptr,
+ y_only? 0 : v_ptr,
+ y_stride, uv_stride, dering);
+ // process 2nd MB
+ mi = mode_info_context + mis;
+ do_left_v = (mb_col > 0);
+ do_above_h = 1;
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 16 * y_stride,
+ y_only ? 0 : (u_ptr + 8 * uv_stride),
+ y_only ? 0 : (v_ptr + 8 * uv_stride),
+ y_stride, uv_stride, dering);
+ // process 3nd MB
+ mi = mode_info_context + mis * 2;
+ do_left_v = (mb_col > 0);
+ do_above_h = 1;
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 32 * y_stride,
+ y_only ? 0 : (u_ptr + 16 * uv_stride),
+ y_only ? 0 : (v_ptr + 16 * uv_stride),
+ y_stride, uv_stride, dering);
+ // process 4th MB
+ mi = mode_info_context + mis * 3;
+ do_left_v = (mb_col > 0);
+ do_above_h = 1;
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 48 * y_stride,
+ y_only ? 0 : (u_ptr + 24 * uv_stride),
+ y_only ? 0 : (v_ptr + 24 * uv_stride),
+ y_stride, uv_stride, dering);
y_ptr += 16;
- if (!y_only) {
- u_ptr += 8;
- v_ptr += 8;
- }
- mode_info_context++; // step to next MB
+ u_ptr = y_only? 0 : u_ptr + 8;
+ v_ptr = y_only? 0 : v_ptr + 8;
+ mode_info_context++; // step to next MB
}
- y_ptr += post->y_stride * 16 - post->y_width;
+ // move pointers to the begining of next sb64 row
+ y_ptr += y_stride * 64 - post->y_width;
if (!y_only) {
- u_ptr += post->uv_stride * 8 - post->uv_width;
- v_ptr += post->uv_stride * 8 - post->uv_width;
+ u_ptr += uv_stride * 32 - post->uv_width;
+ v_ptr += uv_stride * 32 - post->uv_width;
+ }
+ /* skip to next SB64 row */
+ mode_info_context += mis * 4 - cm->mb_cols;
+ }
+ if (extra_sb32_row) {
+ const int sb32_cols = sb64_cols * 2 + extra_sb32_col;
+ for (mb_col = 0; mb_col < sb32_cols * 2; mb_col += 2) {
+ lpf_sb32(cm, mode_info_context, mb_row, mb_col,
+ y_ptr, u_ptr, v_ptr,
+ y_stride, uv_stride, y_only, dering);
+ y_ptr += 32;
+ u_ptr = y_only? 0 : u_ptr + 16;
+ v_ptr = y_only? 0 : v_ptr + 16;
+ mode_info_context += 2; // step to next SB32
+ }
+ if (extra_mb_col) {
+ // process 1st MB
+ mi = mode_info_context;
+ do_left_v = (mb_col > 0);
+ do_above_h = (mb_row > 0);
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr,
+ y_only? NULL : u_ptr,
+ y_only? NULL : v_ptr,
+ y_stride, uv_stride, dering);
+ // process 2nd MB
+ mi = mode_info_context + mis;
+ do_left_v = (mb_col > 0);
+ do_above_h = 1;
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr + 16 * y_stride,
+ y_only ? NULL : (u_ptr + 8 * uv_stride),
+ y_only ? NULL : (v_ptr + 8 * uv_stride),
+ y_stride, uv_stride, dering);
+ y_ptr += 16;
+ u_ptr = y_only? 0 : u_ptr + 8;
+ v_ptr = y_only? 0 : v_ptr + 8;
+ mode_info_context++; /* step to next MB */
+ }
+ // move pointers to the beginning of next sb64 row
+ y_ptr += y_stride * 32 - post->y_width;
+ u_ptr += y_only? 0 : uv_stride * 16 - post->uv_width;
+ v_ptr += y_only? 0 : uv_stride * 16 - post->uv_width;
+ // skip to next MB row if exist
+ mode_info_context += mis * 2 - cm->mb_cols;
+ mb_row += 2;
+ }
+ if (extra_mb_row) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ const MODE_INFO *mi = mode_info_context;
+ do_left_v = (mb_col > 0);
+ do_above_h = (mb_row > 0);
+ do_left_v_mbuv = 1;
+ do_above_h_mbuv = 1;
+ lpf_mb(cm, mi, do_left_v, do_above_h,
+ do_left_v_mbuv, do_above_h_mbuv,
+ y_ptr,
+ y_only? 0 : u_ptr,
+ y_only? 0 : v_ptr,
+ y_stride, uv_stride, dering);
+ y_ptr += 16;
+ u_ptr = y_only? 0 : u_ptr + 8;
+ v_ptr = y_only? 0 : v_ptr + 8;
+ mode_info_context++; // step to next MB
}
- mode_info_context++; // Skip border mb
}
}
-
-
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index bd9a35125..3b81146e2 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -37,7 +37,6 @@ typedef struct {
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
hev_thr[4][SIMD_WIDTH]);
unsigned char lvl[4][4][4];
- unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
unsigned char mode_lf_lut[MB_MODE_COUNT];
} loop_filter_info_n;
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index e1a12e411..666197366 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -158,7 +158,7 @@ static void addmv_and_shuffle(
int i;
int insert_point;
- int duplicate_found = FALSE;
+ int duplicate_found = 0;
// Check for duplicates. If there is one increase its score.
// We only compare vs the current top candidates.
@@ -171,7 +171,7 @@ static void addmv_and_shuffle(
while (i > 0) {
i--;
if (candidate_mv.as_int == mv_list[i].as_int) {
- duplicate_found = TRUE;
+ duplicate_found = 1;
mv_scores[i] += weight;
break;
}
@@ -251,7 +251,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
int split_count = 0;
int (*mv_ref_search)[2];
int *ref_distance_weight;
- int zero_seen = FALSE;
+ int zero_seen = 0;
const int mb_col = (-xd->mb_to_left_edge) >> 7;
// Blank the reference vector lists and other local structures.
@@ -395,7 +395,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
// Scan for 0,0 case and clamp non zero choices
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
if (candidate_mvs[i].as_int == 0) {
- zero_seen = TRUE;
+ zero_seen = 1;
} else {
clamp_mv_ref(xd, &candidate_mvs[i]);
}
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index c7ca67efe..66698f71a 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -68,6 +68,7 @@ typedef struct frame_contexts {
vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
+ vp9_prob partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1];
vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
@@ -95,6 +96,7 @@ typedef struct frame_contexts {
vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1];
vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1];
+ vp9_prob pre_partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1];
unsigned int bmode_counts[VP9_NKF_BINTRAMODES];
unsigned int ymode_counts[VP9_YMODES]; /* interframe intra mode probs */
unsigned int sb_ymode_counts[VP9_I32X32_MODES];
@@ -102,6 +104,7 @@ typedef struct frame_contexts {
unsigned int i8x8_mode_counts[VP9_I8X8_MODES]; /* interframe intra probs */
unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS];
unsigned int mbsplit_counts[VP9_NUMMBSPLITS];
+ unsigned int partition_counts[PARTITION_PLANES][PARTITION_TYPES];
vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES];
@@ -220,7 +223,6 @@ typedef struct VP9Common {
/* profile settings */
int experimental;
- int mb_no_coeff_skip;
TXFM_MODE txfm_mode;
COMPPREDMODE_TYPE comp_pred_mode;
int no_lpf;
@@ -280,8 +282,6 @@ typedef struct VP9Common {
vp9_prob prob_intra_coded;
vp9_prob prob_last_coded;
vp9_prob prob_gf_coded;
- vp9_prob prob_sb32_coded;
- vp9_prob prob_sb64_coded;
// Context probabilities when using predictive coding of segment id
vp9_prob segment_pred_probs[PREDICTION_PROBS];
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index ee378d239..716781170 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -33,6 +33,24 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
scale->y_offset_q4 = 0; // calculated per-mb
scale->y_step_q4 = 16 * other_h / this_h;
+ if (scale->x_num == scale->x_den && scale->y_num == scale->y_den) {
+ scale->scale_value_x = unscaled_value;
+ scale->scale_value_y = unscaled_value;
+ scale->set_scaled_offsets = set_offsets_without_scaling;
+ scale->scale_motion_vector_q3_to_q4 =
+ motion_vector_q3_to_q4_without_scaling;
+ scale->scale_motion_vector_component_q4 =
+ motion_vector_component_q4_without_scaling;
+ } else {
+ scale->scale_value_x = scale_value_x_with_scaling;
+ scale->scale_value_y = scale_value_y_with_scaling;
+ scale->set_scaled_offsets = set_offsets_with_scaling;
+ scale->scale_motion_vector_q3_to_q4 =
+ motion_vector_q3_to_q4_with_scaling;
+ scale->scale_motion_vector_component_q4 =
+ motion_vector_component_q4_with_scaling;
+ }
+
// TODO(agrange): Investigate the best choice of functions to use here
// for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
// to do at full-pel offsets. The current selection, where the filter is
@@ -325,60 +343,13 @@ void vp9_copy_mem8x4_c(const uint8_t *src,
}
}
-static void set_scaled_offsets(struct scale_factors *scale,
- int row, int col) {
- const int x_q4 = 16 * col;
- const int y_q4 = 16 * row;
-
- scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf;
- scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf;
-}
-
-static int32_t scale_motion_vector_component_q3(int mv_q3,
- int num,
- int den,
- int offset_q4) {
- // returns the scaled and offset value of the mv component.
- const int32_t mv_q4 = mv_q3 << 1;
-
- /* TODO(jkoleszar): make fixed point, or as a second multiply? */
- return mv_q4 * num / den + offset_q4;
-}
-
-static int32_t scale_motion_vector_component_q4(int mv_q4,
- int num,
- int den,
- int offset_q4) {
- // returns the scaled and offset value of the mv component.
-
- /* TODO(jkoleszar): make fixed point, or as a second multiply? */
- return mv_q4 * num / den + offset_q4;
-}
-
-static int_mv32 scale_motion_vector_q3_to_q4(
- const int_mv *src_mv,
- const struct scale_factors *scale) {
- // returns mv * scale + offset
- int_mv32 result;
-
- result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row,
- scale->y_num,
- scale->y_den,
- scale->y_offset_q4);
- result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col,
- scale->x_num,
- scale->x_den,
- scale->x_offset_q4);
- return result;
-}
-
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int_mv *mv_q3,
const struct scale_factors *scale,
int w, int h, int weight,
const struct subpix_fn_table *subpix) {
- int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale);
+ int_mv32 mv = scale->scale_motion_vector_q3_to_q4(mv_q3, scale);
src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4);
scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight](
src, src_stride, dst, dst_stride,
@@ -402,11 +373,11 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4)
+ (frac_mv_q4->as_mv.col & 0xf);
const int scaled_mv_row_q4 =
- scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den,
- scale->y_offset_q4);
+ scale->scale_motion_vector_component_q4(mv_row_q4, scale->y_num,
+ scale->y_den, scale->y_offset_q4);
const int scaled_mv_col_q4 =
- scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den,
- scale->x_offset_q4);
+ scale->scale_motion_vector_component_q4(mv_col_q4, scale->x_num,
+ scale->x_den, scale->x_offset_q4);
const int subpel_x = scaled_mv_col_q4 & 15;
const int subpel_y = scaled_mv_row_q4 & 15;
@@ -419,17 +390,19 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
}
static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
- struct scale_factors *scale,
+ struct scale_factors *s,
uint8_t *predictor,
int block_size, int stride,
int which_mv, int weight,
int width, int height,
const struct subpix_fn_table *subpix,
int row, int col) {
+ struct scale_factors * scale = &s[which_mv];
+
assert(d1->predictor - d0->predictor == block_size);
assert(d1->pre == d0->pre + block_size);
- set_scaled_offsets(&scale[which_mv], row, col);
+ scale->set_scaled_offsets(scale, row, col);
if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
@@ -438,7 +411,7 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
d0->pre_stride,
predictor, stride,
&d0->bmi.as_mv[which_mv],
- &scale[which_mv],
+ scale,
width, height,
weight, subpix);
@@ -450,37 +423,39 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
d0->pre_stride,
predictor, stride,
&d0->bmi.as_mv[which_mv],
- &scale[which_mv],
+ scale,
width > block_size ? block_size : width, height,
weight, subpix);
if (width <= block_size) return;
- set_scaled_offsets(&scale[which_mv], row, col + block_size);
+ scale->set_scaled_offsets(scale, row, col + block_size);
vp9_build_inter_predictor(*base_pre1 + d1->pre,
d1->pre_stride,
predictor + block_size, stride,
&d1->bmi.as_mv[which_mv],
- &scale[which_mv],
+ scale,
width - block_size, height,
weight, subpix);
}
}
static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
- struct scale_factors *scale,
+ struct scale_factors *s,
int block_size, int stride,
int which_mv, int weight,
const struct subpix_fn_table *subpix,
int row, int col, int use_dst) {
uint8_t *d0_predictor = use_dst ? *(d0->base_dst) + d0->dst : d0->predictor;
uint8_t *d1_predictor = use_dst ? *(d1->base_dst) + d1->dst : d1->predictor;
+ struct scale_factors * scale = &s[which_mv];
stride = use_dst ? d0->dst_stride : stride;
+
assert(d1_predictor - d0_predictor == block_size);
assert(d1->pre == d0->pre + block_size);
- set_scaled_offsets(&scale[which_mv], row, col);
+ scale->set_scaled_offsets(scale, row, col);
if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
@@ -489,7 +464,7 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
d0->pre_stride,
d0_predictor, stride,
&d0->bmi.as_mv[which_mv],
- &scale[which_mv],
+ scale,
2 * block_size, block_size,
weight, subpix);
} else {
@@ -500,17 +475,17 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
d0->pre_stride,
d0_predictor, stride,
&d0->bmi.as_mv[which_mv],
- &scale[which_mv],
+ scale,
block_size, block_size,
weight, subpix);
- set_scaled_offsets(&scale[which_mv], row, col + block_size);
+ scale->set_scaled_offsets(scale, row, col + block_size);
vp9_build_inter_predictor(*base_pre1 + d1->pre,
d1->pre_stride,
d1_predictor, stride,
&d1->bmi.as_mv[which_mv],
- &scale[which_mv],
+ scale,
block_size, block_size,
weight, subpix);
}
@@ -774,6 +749,7 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
int weight;
int edge[4];
int block_size = 16 << xd->mode_info_context->mbmi.sb_type;
+ struct scale_factors *scale;
if (!use_second_ref) return 0;
if (!(xd->up_available || xd->left_available))
@@ -789,17 +765,17 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
pre_stride = xd->second_pre.y_stride;
ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
// First generate the second predictor
+ scale = &xd->scale_factor[1];
for (n = 0; n < block_size; n += 16) {
xd->mb_to_left_edge = edge[2] - (n << 3);
xd->mb_to_right_edge = edge[3] + ((16 - n) << 3);
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- set_scaled_offsets(&xd->scale_factor[1], mb_row * 16, mb_col * 16 + n);
+ scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16 + n);
// predict a single row of pixels
- vp9_build_inter_predictor(
- base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[1]),
- pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[1],
- 16, 1, 0, &xd->subpix);
+ vp9_build_inter_predictor(base_pre +
+ scaled_buffer_offset(n, 0, pre_stride, scale),
+ pre_stride, tmp_y + n, tmp_ystride, &ymv, scale, 16, 1, 0, &xd->subpix);
}
xd->mb_to_left_edge = edge[2];
xd->mb_to_right_edge = edge[3];
@@ -808,12 +784,12 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- set_scaled_offsets(&xd->scale_factor[1], mb_row * 16 + n, mb_col * 16);
+ scale->set_scaled_offsets(scale, mb_row * 16 + n, mb_col * 16);
// predict a single col of pixels
- vp9_build_inter_predictor(
- base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[1]),
+ vp9_build_inter_predictor(base_pre +
+ scaled_buffer_offset(0, n, pre_stride, scale),
pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
- &xd->scale_factor[1], 1, 16, 0, &xd->subpix);
+ scale, 1, 16, 0, &xd->subpix);
}
xd->mb_to_top_edge = edge[0];
xd->mb_to_bottom_edge = edge[1];
@@ -825,17 +801,17 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
pre_stride = xd->pre.y_stride;
ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
// Now generate the first predictor
+ scale = &xd->scale_factor[0];
for (n = 0; n < block_size; n += 16) {
xd->mb_to_left_edge = edge[2] - (n << 3);
xd->mb_to_right_edge = edge[3] + ((16 - n) << 3);
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- set_scaled_offsets(&xd->scale_factor[0], mb_row * 16, mb_col * 16 + n);
+ scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16 + n);
// predict a single row of pixels
- vp9_build_inter_predictor(
- base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[0]),
- pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[0],
- 16, 1, 0, &xd->subpix);
+ vp9_build_inter_predictor(base_pre +
+ scaled_buffer_offset(n, 0, pre_stride, scale),
+ pre_stride, tmp_y + n, tmp_ystride, &ymv, scale, 16, 1, 0, &xd->subpix);
}
xd->mb_to_left_edge = edge[2];
xd->mb_to_right_edge = edge[3];
@@ -844,12 +820,12 @@ static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- set_scaled_offsets(&xd->scale_factor[0], mb_row * 16 + n, mb_col * 16);
+ scale->set_scaled_offsets(scale, mb_row * 16 + n, mb_col * 16);
// predict a single col of pixels
- vp9_build_inter_predictor(
- base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[0]),
+ vp9_build_inter_predictor(base_pre +
+ scaled_buffer_offset(0, n, pre_stride, scale),
pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
- &xd->scale_factor[0], 1, 16, 0, &xd->subpix);
+ scale, 1, 16, 0, &xd->subpix);
}
xd->mb_to_top_edge = edge[0];
xd->mb_to_bottom_edge = edge[1];
@@ -877,17 +853,18 @@ static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd,
uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
int_mv ymv;
+ struct scale_factors *scale = &xd->scale_factor[which_mv];
+
ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);
+ scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
- vp9_build_inter_predictor(base_pre, pre_stride,
- dst_y, dst_ystride,
- &ymv, &xd->scale_factor[which_mv],
- 16, 16, which_mv ? weight : 0, &xd->subpix);
+ vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride,
+ &ymv, scale, 16, 16,
+ which_mv ? weight : 0, &xd->subpix);
}
}
@@ -920,17 +897,17 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
int_mv ymv;
+ struct scale_factors *scale = &xd->scale_factor[which_mv];
+
ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);
+ scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
- vp9_build_inter_predictor(base_pre, pre_stride,
- dst_y, dst_ystride,
- &ymv, &xd->scale_factor[which_mv],
- 16, 16, which_mv, &xd->subpix);
+ vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride,
+ &ymv, scale, 16, 16, which_mv, &xd->subpix);
}
}
#endif
@@ -956,6 +933,8 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
int_mv _o16x16mv;
int_mv _16x16mv;
+ struct scale_factors *scale = &xd->scale_factor_uv[which_mv];
+
_16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
if (clamp_mvs)
@@ -979,18 +958,15 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
- set_scaled_offsets(&xd->scale_factor_uv[which_mv],
- mb_row * 16, mb_col * 16);
+ scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
vp9_build_inter_predictor_q4(
uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
- &xd->scale_factor_uv[which_mv], 8, 8,
- which_mv ? weight : 0, &xd->subpix);
+ scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
vp9_build_inter_predictor_q4(
vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
- &xd->scale_factor_uv[which_mv], 8, 8,
- which_mv ? weight : 0, &xd->subpix);
+ scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
}
}
@@ -1030,6 +1006,8 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
int_mv _o16x16mv;
int_mv _16x16mv;
+ struct scale_factors *scale = &xd->scale_factor_uv[which_mv];
+
_16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
if (clamp_mvs)
@@ -1053,17 +1031,16 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
- set_scaled_offsets(&xd->scale_factor_uv[which_mv],
- mb_row * 16, mb_col * 16);
+ scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
vp9_build_inter_predictor_q4(
uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
- &xd->scale_factor_uv[which_mv], 8, 8,
+ scale, 8, 8,
which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
vp9_build_inter_predictor_q4(
vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
- &xd->scale_factor_uv[which_mv], 8, 8,
+ scale, 8, 8,
which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
}
}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 585fcfd6d..068853d1d 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -77,20 +77,27 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
int w, int h, int do_avg,
const struct subpix_fn_table *subpix);
-static int scale_value_x(int val, const struct scale_factors *scale) {
+static int scale_value_x_with_scaling(int val,
+ const struct scale_factors *scale) {
return val * scale->x_num / scale->x_den;
}
-static int scale_value_y(int val, const struct scale_factors *scale) {
+static int scale_value_y_with_scaling(int val,
+ const struct scale_factors *scale) {
return val * scale->y_num / scale->y_den;
}
+static int unscaled_value(int val, const struct scale_factors *scale) {
+ (void) scale;
+ return val;
+}
+
static int scaled_buffer_offset(int x_offset,
int y_offset,
int stride,
const struct scale_factors *scale) {
- return scale_value_y(y_offset, scale) * stride +
- scale_value_x(x_offset, scale);
+ return scale->scale_value_y(y_offset, scale) * stride +
+ scale->scale_value_x(x_offset, scale);
}
static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
@@ -112,6 +119,7 @@ static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;
recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;
}
+
*dst = *src;
dst->y_buffer += recon_yoffset;
dst->u_buffer += recon_uvoffset;
@@ -128,4 +136,66 @@ static void set_scale_factors(MACROBLOCKD *xd,
xd->scale_factor_uv[1] = xd->scale_factor[1];
}
+static void set_offsets_with_scaling(struct scale_factors *scale,
+ int row, int col) {
+ const int x_q4 = 16 * col;
+ const int y_q4 = 16 * row;
+
+ scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf;
+ scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf;
+}
+
+static void set_offsets_without_scaling(struct scale_factors *scale,
+ int row, int col) {
+ scale->x_offset_q4 = 0;
+ scale->y_offset_q4 = 0;
+}
+
+static int_mv32 motion_vector_q3_to_q4_with_scaling(
+ const int_mv *src_mv,
+ const struct scale_factors *scale) {
+ // returns mv * scale + offset
+ int_mv32 result;
+ const int32_t mv_row_q4 = src_mv->as_mv.row << 1;
+ const int32_t mv_col_q4 = src_mv->as_mv.col << 1;
+
+ /* TODO(jkoleszar): make fixed point, or as a second multiply? */
+ result.as_mv.row = mv_row_q4 * scale->y_num / scale->y_den
+ + scale->y_offset_q4;
+ result.as_mv.col = mv_col_q4 * scale->x_num / scale->x_den
+ + scale->x_offset_q4;
+ return result;
+}
+
+static int_mv32 motion_vector_q3_to_q4_without_scaling(
+ const int_mv *src_mv,
+ const struct scale_factors *scale) {
+ // returns mv * scale + offset
+ int_mv32 result;
+
+ result.as_mv.row = src_mv->as_mv.row << 1;
+ result.as_mv.col = src_mv->as_mv.col << 1;
+ return result;
+}
+
+static int32_t motion_vector_component_q4_with_scaling(int mv_q4,
+ int num,
+ int den,
+ int offset_q4) {
+ // returns the scaled and offset value of the mv component.
+
+ /* TODO(jkoleszar): make fixed point, or as a second multiply? */
+ return mv_q4 * num / den + offset_q4;
+}
+
+static int32_t motion_vector_component_q4_without_scaling(int mv_q4,
+ int num,
+ int den,
+ int offset_q4) {
+ // returns the scaled and offset value of the mv component.
+ (void)num;
+ (void)den;
+ (void)offset_q4;
+ return mv_q4;
+}
#endif // VP9_COMMON_VP9_RECONINTER_H_
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 34e95a252..632191183 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -31,7 +31,7 @@ static const unsigned int iscale[64] = {
};
static INLINE int iscale_round(int value, int i) {
- return ROUND_POWER_OF_TWO(value * iscale[i], 16);
+ return ROUND_POWER_OF_TWO(value * iscale[i], 16);
}
static void d27_predictor(uint8_t *ypred_ptr, int y_stride,
@@ -70,7 +70,7 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride,
ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride] +
yleft_col[r], 1);
for (r = bh - 2; r >= bh / 2; --r) {
- int w = c + (bh - 1 - r) * 2;
+ const int w = c + (bh - 1 - r) * 2;
ypred_ptr[r * y_stride + w] =
ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] +
ypred_ptr[r * y_stride + w - 1], 1);
@@ -78,7 +78,7 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride,
for (c = 1; c < bw; c++) {
for (r = bh - 1; r >= bh / 2 + c / 2; --r) {
- int w = c + (bh - 1 - r) * 2;
+ const int w = c + (bh - 1 - r) * 2;
ypred_ptr[r * y_stride + w] =
ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] +
ypred_ptr[r * y_stride + w - 1], 1);
@@ -121,7 +121,7 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride,
c = bw - 1;
ypred_ptr[c] = ROUND_POWER_OF_TWO(ypred_ptr[(c - 1)] + yabove_row[c], 1);
for (c = bw - 2; c >= bw / 2; --c) {
- int h = r + (bw - 1 - c) * 2;
+ const int h = r + (bw - 1 - c) * 2;
ypred_ptr[h * y_stride + c] =
ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] +
ypred_ptr[(h - 1) * y_stride + c], 1);
@@ -129,7 +129,7 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride,
for (r = 1; r < bh; r++) {
for (c = bw - 1; c >= bw / 2 + r / 2; --c) {
- int h = r + (bw - 1 - c) * 2;
+ const int h = r + (bw - 1 - c) * 2;
ypred_ptr[h * y_stride + c] =
ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] +
ypred_ptr[(h - 1) * y_stride + c], 1);
@@ -197,9 +197,8 @@ static void d135_predictor(uint8_t *ypred_ptr, int y_stride,
ypred_ptr += y_stride;
for (r = 1; r < bh; ++r) {
- for (c = 1; c < bw; c++) {
+ for (c = 1; c < bw; c++)
ypred_ptr[c] = ypred_ptr[-y_stride + c - 1];
- }
ypred_ptr += y_stride;
}
}
@@ -300,14 +299,13 @@ void vp9_build_intra_predictors(uint8_t *src, int src_stride,
int r, c, i;
uint8_t yleft_col[64], yabove_data[65], ytop_left;
uint8_t *yabove_row = yabove_data + 1;
- /*
- * 127 127 127 .. 127 127 127 127 127 127
- * 129 A B .. Y Z
- * 129 C D .. W X
- * 129 E F .. U V
- * 129 G H .. S T T T T T
- * ..
- */
+
+ // 127 127 127 .. 127 127 127 127 127 127
+ // 129 A B .. Y Z
+ // 129 C D .. W X
+ // 129 E F .. U V
+ // 129 G H .. S T T T T T
+ // ..
if (left_available) {
for (i = 0; i < bh; i++)
@@ -319,42 +317,34 @@ void vp9_build_intra_predictors(uint8_t *src, int src_stride,
if (up_available) {
uint8_t *yabove_ptr = src - src_stride;
vpx_memcpy(yabove_row, yabove_ptr, bw);
- if (left_available) {
- ytop_left = yabove_ptr[-1];
- } else {
- ytop_left = 127;
- }
+ ytop_left = left_available ? yabove_ptr[-1] : 127;
} else {
vpx_memset(yabove_row, 127, bw);
ytop_left = 127;
}
yabove_row[-1] = ytop_left;
- /* for Y */
+
switch (mode) {
case DC_PRED: {
- int expected_dc;
int i;
+ int expected_dc = 128;
int average = 0;
int count = 0;
if (up_available || left_available) {
if (up_available) {
- for (i = 0; i < bw; i++) {
+ for (i = 0; i < bw; i++)
average += yabove_row[i];
- }
count += bw;
}
if (left_available) {
- for (i = 0; i < bh; i++) {
+ for (i = 0; i < bh; i++)
average += yleft_col[i];
- }
count += bh;
}
expected_dc = (average + (count >> 1)) / count;
- } else {
- expected_dc = 128;
}
for (r = 0; r < bh; r++) {
@@ -377,39 +367,102 @@ void vp9_build_intra_predictors(uint8_t *src, int src_stride,
break;
case TM_PRED:
for (r = 0; r < bh; r++) {
- for (c = 0; c < bw; c++) {
+ for (c = 0; c < bw; c++)
ypred_ptr[c] = clip_pixel(yleft_col[r] + yabove_row[c] - ytop_left);
- }
ypred_ptr += y_stride;
}
break;
+#if CONFIG_SBSEGMENT
case D45_PRED:
- d45_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
- break;
case D135_PRED:
- d135_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
- break;
case D117_PRED:
- d117_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
- break;
case D153_PRED:
- d153_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
- break;
case D27_PRED:
- d27_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
- break;
case D63_PRED:
- d63_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ if (bw == bh) {
+ switch (mode) {
+#endif
+ case D45_PRED:
+ d45_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ break;
+ case D135_PRED:
+ d135_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ break;
+ case D117_PRED:
+ d117_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ break;
+ case D153_PRED:
+ d153_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ break;
+ case D27_PRED:
+ d27_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ break;
+ case D63_PRED:
+ d63_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col);
+ break;
+#if CONFIG_SBSEGMENT
+ default:
+ assert(0);
+ }
+ } else if (bw > bh) {
+ uint8_t pred[64*64];
+ memset(yleft_col + bh, yleft_col[bh - 1], bw - bh);
+ switch (mode) {
+ case D45_PRED:
+ d45_predictor(pred, 64, bw, bw, yabove_row, yleft_col);
+ break;
+ case D135_PRED:
+ d135_predictor(pred, 64, bw, bw, yabove_row, yleft_col);
+ break;
+ case D117_PRED:
+ d117_predictor(pred, 64, bw, bw, yabove_row, yleft_col);
+ break;
+ case D153_PRED:
+ d153_predictor(pred, 64, bw, bw, yabove_row, yleft_col);
+ break;
+ case D27_PRED:
+ d27_predictor(pred, 64, bw, bw, yabove_row, yleft_col);
+ break;
+ case D63_PRED:
+ d63_predictor(pred, 64, bw, bw, yabove_row, yleft_col);
+ break;
+ default:
+ assert(0);
+ }
+ for (i = 0; i < bh; i++)
+ memcpy(ypred_ptr + y_stride * i, pred + i * 64, bw);
+ } else {
+ uint8_t pred[64 * 64];
+ memset(yabove_row + bw, yabove_row[bw - 1], bh - bw);
+ switch (mode) {
+ case D45_PRED:
+ d45_predictor(pred, 64, bh, bh, yabove_row, yleft_col);
+ break;
+ case D135_PRED:
+ d135_predictor(pred, 64, bh, bh, yabove_row, yleft_col);
+ break;
+ case D117_PRED:
+ d117_predictor(pred, 64, bh, bh, yabove_row, yleft_col);
+ break;
+ case D153_PRED:
+ d153_predictor(pred, 64, bh, bh, yabove_row, yleft_col);
+ break;
+ case D27_PRED:
+ d27_predictor(pred, 64, bh, bh, yabove_row, yleft_col);
+ break;
+ case D63_PRED:
+ d63_predictor(pred, 64, bh, bh, yabove_row, yleft_col);
+ break;
+ default:
+ assert(0);
+ }
+ for (i = 0; i < bh; i++)
+ memcpy(ypred_ptr + y_stride * i, pred + i * 64, bw);
+ }
break;
- case I8X8_PRED:
- case I4X4_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
+#endif
+ default:
break;
}
}
@@ -746,7 +799,7 @@ void vp9_intra8x8_predict(MACROBLOCKD *xd,
const int block4x4_idx = (b - xd->block);
const int block_idx = (block4x4_idx >> 2) | !!(block4x4_idx & 2);
const int have_top = (block_idx >> 1) || xd->up_available;
- const int have_left = (block_idx & 1) || xd->left_available;
+ const int have_left = (block_idx & 1) || xd->left_available;
const int have_right = !(block_idx & 1) || xd->right_available;
vp9_build_intra_predictors(*(b->base_dst) + b->dst,
@@ -761,7 +814,7 @@ void vp9_intra_uv4x4_predict(MACROBLOCKD *xd,
uint8_t *predictor, int pre_stride) {
const int block_idx = (b - xd->block) & 3;
const int have_top = (block_idx >> 1) || xd->up_available;
- const int have_left = (block_idx & 1) || xd->left_available;
+ const int have_left = (block_idx & 1) || xd->left_available;
const int have_right = !(block_idx & 1) || xd->right_available;
vp9_build_intra_predictors(*(b->base_dst) + b->dst,
diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c
index 08bfd1d8e..a0700010b 100644
--- a/vp9/common/vp9_reconintra4x4.c
+++ b/vp9/common/vp9_reconintra4x4.c
@@ -57,17 +57,17 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
*/
switch (imin) {
case 1:
- return B_HD_PRED;
+ return B_D153_PRED;
case 2:
- return B_RD_PRED;
+ return B_D135_PRED;
case 3:
- return B_VR_PRED;
+ return B_D117_PRED;
case 5:
- return B_VL_PRED;
+ return B_D63_PRED;
case 6:
- return B_LD_PRED;
+ return B_D45_PRED;
case 7:
- return B_HU_PRED;
+ return B_D27_PRED;
default:
assert(0);
}
@@ -93,13 +93,13 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
*/
switch (imin) {
case 1:
- return B_HD_PRED;
+ return B_D153_PRED;
case 3:
- return B_VR_PRED;
+ return B_D117_PRED;
case 5:
- return B_VL_PRED;
+ return B_D63_PRED;
case 7:
- return B_HU_PRED;
+ return B_D27_PRED;
default:
assert(0);
}
@@ -126,21 +126,21 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
switch (imin) {
case 0:
- return B_HE_PRED;
+ return B_H_PRED;
case 1:
- return B_HD_PRED;
+ return B_D153_PRED;
case 2:
- return B_RD_PRED;
+ return B_D135_PRED;
case 3:
- return B_VR_PRED;
+ return B_D117_PRED;
case 4:
- return B_VE_PRED;
+ return B_V_PRED;
case 5:
- return B_VL_PRED;
+ return B_D63_PRED;
case 6:
- return B_LD_PRED;
+ return B_D45_PRED;
case 7:
- return B_HU_PRED;
+ return B_D27_PRED;
default:
assert(0);
}
@@ -195,12 +195,7 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd,
if (have_top) {
uint8_t *above_ptr = *(x->base_dst) + x->dst - x->dst_stride;
-
- if (have_left) {
- top_left = above_ptr[-1];
- } else {
- top_left = 127;
- }
+ top_left = have_left ? above_ptr[-1] : 127;
above[0] = above_ptr[0];
above[1] = above_ptr[1];
@@ -270,13 +265,11 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd,
expected_dc += left[i];
}
- expected_dc = (expected_dc + 4) >> 3;
+ expected_dc = ROUND_POWER_OF_TWO(expected_dc, 3);
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
+ for (c = 0; c < 4; c++)
predictor[c] = expected_dc;
- }
-
predictor += ps;
}
}
@@ -284,210 +277,160 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd,
case B_TM_PRED: {
/* prediction similar to true_motion prediction */
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
+ for (c = 0; c < 4; c++)
predictor[c] = clip_pixel(above[c] - top_left + left[r]);
- }
-
predictor += ps;
}
}
break;
-
- case B_VE_PRED: {
- unsigned int ap[4];
-
- ap[0] = above[0];
- ap[1] = above[1];
- ap[2] = above[2];
- ap[3] = above[3];
-
+ case B_V_PRED:
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- predictor[c] = ap[c];
- }
-
+ for (c = 0; c < 4; c++)
+ predictor[c] = above[c];
predictor += ps;
}
- }
- break;
-
- case B_HE_PRED: {
- unsigned int lp[4];
-
- lp[0] = left[0];
- lp[1] = left[1];
- lp[2] = left[2];
- lp[3] = left[3];
-
+ break;
+ case B_H_PRED:
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- predictor[c] = lp[r];
- }
-
+ for (c = 0; c < 4; c++)
+ predictor[c] = left[r];
predictor += ps;
}
- }
- break;
- case B_LD_PRED: {
- uint8_t *ptr = above;
+ break;
+ case B_D45_PRED: {
+ uint8_t *p = above;
- predictor[0 * ps + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
+ predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2);
predictor[0 * ps + 1] =
- predictor[1 * ps + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
+ predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2);
predictor[0 * ps + 2] =
predictor[1 * ps + 1] =
- predictor[2 * ps + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
+ predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2);
predictor[0 * ps + 3] =
predictor[1 * ps + 2] =
predictor[2 * ps + 1] =
- predictor[3 * ps + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
+ predictor[3 * ps + 0] =
+ ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2);
predictor[1 * ps + 3] =
predictor[2 * ps + 2] =
- predictor[3 * ps + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
+ predictor[3 * ps + 1] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2);
predictor[2 * ps + 3] =
- predictor[3 * ps + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
- predictor[3 * ps + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
+ predictor[3 * ps + 2] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2);
+ predictor[3 * ps + 3] = ROUND_POWER_OF_TWO(p[6] + p[7] * 2 + p[7], 2);
}
break;
- case B_RD_PRED: {
- uint8_t pp[9];
-
- pp[0] = left[3];
- pp[1] = left[2];
- pp[2] = left[1];
- pp[3] = left[0];
- pp[4] = top_left;
- pp[5] = above[0];
- pp[6] = above[1];
- pp[7] = above[2];
- pp[8] = above[3];
+ case B_D135_PRED: {
+ uint8_t p[9] = { left[3], left[2], left[1], left[0],
+ top_left,
+ above[0], above[1], above[2], above[3] };
- predictor[3 * ps + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2);
predictor[3 * ps + 1] =
- predictor[2 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2);
predictor[3 * ps + 2] =
predictor[2 * ps + 1] =
- predictor[1 * ps + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2);
predictor[3 * ps + 3] =
predictor[2 * ps + 2] =
predictor[1 * ps + 1] =
- predictor[0 * ps + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[0 * ps + 0] =
+ ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2);
predictor[2 * ps + 3] =
predictor[1 * ps + 2] =
- predictor[0 * ps + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2);
predictor[1 * ps + 3] =
- predictor[0 * ps + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- predictor[0 * ps + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+ predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2);
+ predictor[0 * ps + 3] = ROUND_POWER_OF_TWO(p[6] + p[7] * 2 + p[8], 2);
}
break;
- case B_VR_PRED: {
- uint8_t pp[9];
+ case B_D117_PRED: {
+ uint8_t p[9] = { left[3], left[2], left[1], left[0],
+ top_left,
+ above[0], above[1], above[2], above[3] };
- pp[0] = left[3];
- pp[1] = left[2];
- pp[2] = left[1];
- pp[3] = left[0];
- pp[4] = top_left;
- pp[5] = above[0];
- pp[6] = above[1];
- pp[7] = above[2];
- pp[8] = above[3];
-
- predictor[3 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- predictor[2 * ps + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2);
+ predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2);
predictor[3 * ps + 1] =
- predictor[1 * ps + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2);
predictor[2 * ps + 1] =
- predictor[0 * ps + 0] = (pp[4] + pp[5] + 1) >> 1;
+ predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[4] + p[5], 1);
predictor[3 * ps + 2] =
- predictor[1 * ps + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[1 * ps + 1] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2);
predictor[2 * ps + 2] =
- predictor[0 * ps + 1] = (pp[5] + pp[6] + 1) >> 1;
+ predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[5] + p[6], 1);
predictor[3 * ps + 3] =
- predictor[1 * ps + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[1 * ps + 2] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2);
predictor[2 * ps + 3] =
- predictor[0 * ps + 2] = (pp[6] + pp[7] + 1) >> 1;
- predictor[1 * ps + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
- predictor[0 * ps + 3] = (pp[7] + pp[8] + 1) >> 1;
+ predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[6] + p[7], 1);
+ predictor[1 * ps + 3] = ROUND_POWER_OF_TWO(p[6] + p[7] * 2 + p[8], 2);
+ predictor[0 * ps + 3] = ROUND_POWER_OF_TWO(p[7] + p[8], 1);
}
break;
- case B_VL_PRED: {
- uint8_t *pp = above;
+ case B_D63_PRED: {
+ uint8_t *p = above;
- predictor[0 * ps + 0] = (pp[0] + pp[1] + 1) >> 1;
- predictor[1 * ps + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1], 1);
+ predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2);
predictor[2 * ps + 0] =
- predictor[0 * ps + 1] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[1] + p[2], 1);
predictor[1 * ps + 1] =
- predictor[3 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2);
predictor[2 * ps + 1] =
- predictor[0 * ps + 2] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[2] + p[3], 1);
predictor[3 * ps + 1] =
- predictor[1 * ps + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[1 * ps + 2] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2);
predictor[0 * ps + 3] =
- predictor[2 * ps + 2] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[2 * ps + 2] = ROUND_POWER_OF_TWO(p[3] + p[4], 1);
predictor[1 * ps + 3] =
- predictor[3 * ps + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- predictor[2 * ps + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- predictor[3 * ps + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[3 * ps + 2] = ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2);
+ predictor[2 * ps + 3] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2);
+ predictor[3 * ps + 3] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2);
}
break;
+ case B_D153_PRED: {
+ uint8_t p[9] = { left[3], left[2], left[1], left[0],
+ top_left,
+ above[0], above[1], above[2], above[3] };
- case B_HD_PRED: {
- uint8_t pp[9];
-
- pp[0] = left[3];
- pp[1] = left[2];
- pp[2] = left[1];
- pp[3] = left[0];
- pp[4] = top_left;
- pp[5] = above[0];
- pp[6] = above[1];
- pp[7] = above[2];
- pp[8] = above[3];
-
-
- predictor[3 * ps + 0] = (pp[0] + pp[1] + 1) >> 1;
- predictor[3 * ps + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[3 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1], 1);
+ predictor[3 * ps + 1] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2);
predictor[2 * ps + 0] =
- predictor[3 * ps + 2] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[3 * ps + 2] = ROUND_POWER_OF_TWO(p[1] + p[2], 1);
predictor[2 * ps + 1] =
- predictor[3 * ps + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[3 * ps + 3] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2);
predictor[2 * ps + 2] =
- predictor[1 * ps + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3], 1);
predictor[2 * ps + 3] =
- predictor[1 * ps + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[1 * ps + 1] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[4], 2);
predictor[1 * ps + 2] =
- predictor[0 * ps + 0] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[3] + p[4], 1);
predictor[1 * ps + 3] =
- predictor[0 * ps + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- predictor[0 * ps + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- predictor[0 * ps + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[3] + p[4] * 2 + p[5], 2);
+ predictor[0 * ps + 2] = ROUND_POWER_OF_TWO(p[4] + p[5] * 2 + p[6], 2);
+ predictor[0 * ps + 3] = ROUND_POWER_OF_TWO(p[5] + p[6] * 2 + p[7], 2);
}
break;
-
-
- case B_HU_PRED: {
- uint8_t *pp = left;
- predictor[0 * ps + 0] = (pp[0] + pp[1] + 1) >> 1;
- predictor[0 * ps + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ case B_D27_PRED: {
+ uint8_t *p = left;
+ predictor[0 * ps + 0] = ROUND_POWER_OF_TWO(p[0] + p[1], 1);
+ predictor[0 * ps + 1] = ROUND_POWER_OF_TWO(p[0] + p[1] * 2 + p[2], 2);
predictor[0 * ps + 2] =
- predictor[1 * ps + 0] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[1 * ps + 0] = ROUND_POWER_OF_TWO(p[1] + p[2], 1);
predictor[0 * ps + 3] =
- predictor[1 * ps + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[1 * ps + 1] = ROUND_POWER_OF_TWO(p[1] + p[2] * 2 + p[3], 2);
predictor[1 * ps + 2] =
- predictor[2 * ps + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[2 * ps + 0] = ROUND_POWER_OF_TWO(p[2] + p[3], 1);
predictor[1 * ps + 3] =
- predictor[2 * ps + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * ps + 1] = ROUND_POWER_OF_TWO(p[2] + p[3] * 2 + p[3], 2);
predictor[2 * ps + 2] =
predictor[2 * ps + 3] =
predictor[3 * ps + 0] =
predictor[3 * ps + 1] =
predictor[3 * ps + 2] =
- predictor[3 * ps + 3] = pp[3];
+ predictor[3 * ps + 3] = p[3];
}
break;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 8ce9574c2..6db44a0a0 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -27,25 +27,25 @@ forward_decls vp9_common_forward_decls
#
# Dequant
#
-prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_y_block_8x8
-prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
+prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob"
specialize vp9_dequant_idct_add_16x16
-prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
+prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob"
specialize vp9_dequant_idct_add_8x8
-prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
+prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob"
specialize vp9_dequant_idct_add
-prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_y_block
-prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, uint16_t *eobs"
+prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, uint16_t *eobs"
specialize vp9_dequant_idct_add_uv_block
-prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob"
+prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *dst, int stride, int eob"
specialize vp9_dequant_idct_add_32x32
#
@@ -108,25 +108,25 @@ prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x
specialize vp9_intra_uv4x4_predict;
if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
-prototype void vp9_add_residual_4x4 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_residual_4x4 "const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_4x4 sse2
-prototype void vp9_add_residual_8x8 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_residual_8x8 "const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_8x8 sse2
-prototype void vp9_add_residual_16x16 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_residual_16x16 "const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_16x16 sse2
-prototype void vp9_add_residual_32x32 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_residual_32x32 "const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_32x32 sse2
-prototype void vp9_add_constant_residual_8x8 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_constant_residual_8x8 "const int16_t diff, uint8_t *dest, int stride"
specialize vp9_add_constant_residual_8x8 sse2
-prototype void vp9_add_constant_residual_16x16 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_constant_residual_16x16 "const int16_t diff, uint8_t *dest, int stride"
specialize vp9_add_constant_residual_16x16 sse2
-prototype void vp9_add_constant_residual_32x32 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+prototype void vp9_add_constant_residual_32x32 "const int16_t diff, uint8_t *dest, int stride"
specialize vp9_add_constant_residual_32x32 sse2
fi
@@ -376,6 +376,22 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
# variance
[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
+prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance32x16
+
+prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance16x32
+
+prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance64x32
+
+prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance32x64
+
+fi
+
prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x32
@@ -410,6 +426,22 @@ vp9_variance4x4_mmx=vp9_variance4x4_mmx
prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance64x64 sse2
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
+prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance32x64
+
+prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance64x32
+
+prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance32x16
+
+prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance16x32
+
+fi
+
prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x32 sse2
@@ -436,6 +468,22 @@ vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad64x64 sse2
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
+prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad32x64 sse2
+
+prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad64x32 sse2
+
+prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad32x16 sse2
+
+prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad16x32 sse2
+
+fi
+
prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad32x32 sse2
@@ -529,6 +577,22 @@ specialize vp9_sad4x4x8 sse4
prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad64x64x4d sse2
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
+prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x64x4d sse2
+
+prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad64x32x4d sse2
+
+prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x16x4d sse2
+
+prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x32x4d sse2
+
+fi
+
prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad32x32x4d sse2
@@ -606,10 +670,10 @@ prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int p
specialize vp9_short_fdct8x8 sse2
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fdct4x4
+specialize vp9_short_fdct4x4 sse2
prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fdct8x4
+specialize vp9_short_fdct8x4 sse2
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct32x32
diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c
index 6e2597954..3f049b5b3 100644
--- a/vp9/common/vp9_treecoder.c
+++ b/vp9/common/vp9_treecoder.c
@@ -18,32 +18,27 @@
#include "vp9/common/vp9_treecoder.h"
-static void tree2tok(
- struct vp9_token_struct *const p,
- vp9_tree t,
- int i,
- int v,
- int L
-) {
+static void tree2tok(struct vp9_token *const p, vp9_tree t,
+ int i, int v, int l) {
v += v;
- ++L;
+ ++l;
do {
const vp9_tree_index j = t[i++];
if (j <= 0) {
p[-j].value = v;
- p[-j].Len = L;
+ p[-j].len = l;
} else
- tree2tok(p, t, j, v, L);
+ tree2tok(p, t, j, v, l);
} while (++v & 1);
}
-void vp9_tokens_from_tree(struct vp9_token_struct *p, vp9_tree t) {
+void vp9_tokens_from_tree(struct vp9_token *p, vp9_tree t) {
tree2tok(p, t, 0, 0, 0);
}
-void vp9_tokens_from_tree_offset(struct vp9_token_struct *p, vp9_tree t,
+void vp9_tokens_from_tree_offset(struct vp9_token *p, vp9_tree t,
int offset) {
tree2tok(p - offset, t, 0, 0, 0);
}
diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h
index 9297d5280..ebcd4116f 100644
--- a/vp9/common/vp9_treecoder.h
+++ b/vp9/common/vp9_treecoder.h
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
typedef uint8_t vp9_prob;
@@ -31,16 +32,15 @@ typedef int8_t vp9_tree_index;
typedef const vp9_tree_index vp9_tree[], *vp9_tree_p;
-typedef const struct vp9_token_struct {
+struct vp9_token {
int value;
- int Len;
-} vp9_token;
+ int len;
+};
/* Construct encoding array from tree. */
-void vp9_tokens_from_tree(struct vp9_token_struct *, vp9_tree);
-void vp9_tokens_from_tree_offset(struct vp9_token_struct *, vp9_tree,
- int offset);
+void vp9_tokens_from_tree(struct vp9_token*, vp9_tree);
+void vp9_tokens_from_tree_offset(struct vp9_token*, vp9_tree, int offset);
/* Convert array of token occurrence counts into a table of probabilities
for the associated binary encoding tree. Also writes count of branches
@@ -76,7 +76,7 @@ static INLINE vp9_prob get_binary_prob(int n0, int n1) {
/* this function assumes prob1 and prob2 are already within [1,255] range */
static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) {
- return (prob1 * (256 - factor) + prob2 * factor + 128) >> 8;
+ return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
}
#endif // VP9_COMMON_VP9_TREECODER_H_
diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c
index 7e3b4646b..dcd591642 100644
--- a/vp9/decoder/vp9_dboolhuff.c
+++ b/vp9/decoder/vp9_dboolhuff.c
@@ -13,34 +13,29 @@
#include "vp9/decoder/vp9_dboolhuff.h"
-int vp9_start_decode(BOOL_DECODER *br,
- const unsigned char *source,
- unsigned int source_sz) {
- br->user_buffer_end = source + source_sz;
- br->user_buffer = source;
+int vp9_start_decode(BOOL_DECODER *br, const uint8_t *buffer, size_t size) {
+ br->buffer_end = buffer + size;
+ br->buffer = buffer;
br->value = 0;
br->count = -8;
br->range = 255;
- if (source_sz && !source)
+ if (size && !buffer)
return 1;
- /* Populate the buffer */
- vp9_bool_decoder_fill(br);
-
+ vp9_reader_fill(br);
return 0;
}
-
-void vp9_bool_decoder_fill(BOOL_DECODER *br) {
- const unsigned char *bufptr = br->user_buffer;
- const unsigned char *bufend = br->user_buffer_end;
+void vp9_reader_fill(BOOL_DECODER *br) {
+ const uint8_t *const buffer_end = br->buffer_end;
+ const uint8_t *buffer = br->buffer;
VP9_BD_VALUE value = br->value;
int count = br->count;
int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8);
int loop_end = 0;
- int bits_left = (int)((bufend - bufptr)*CHAR_BIT);
- int x = shift + CHAR_BIT - bits_left;
+ const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT);
+ const int x = shift + CHAR_BIT - bits_left;
if (x >= 0) {
count += VP9_LOTS_OF_BITS;
@@ -50,18 +45,18 @@ void vp9_bool_decoder_fill(BOOL_DECODER *br) {
if (x < 0 || bits_left) {
while (shift >= loop_end) {
count += CHAR_BIT;
- value |= (VP9_BD_VALUE)*bufptr++ << shift;
+ value |= (VP9_BD_VALUE)*buffer++ << shift;
shift -= CHAR_BIT;
}
}
- br->user_buffer = bufptr;
+ br->buffer = buffer;
br->value = value;
br->count = count;
}
-static int get_unsigned_bits(unsigned num_values) {
+static int get_unsigned_bits(unsigned int num_values) {
int cat = 0;
if (num_values <= 1)
return 0;
@@ -84,30 +79,29 @@ int vp9_inv_recenter_nonneg(int v, int m) {
int vp9_decode_uniform(BOOL_DECODER *br, int n) {
int v;
- int l = get_unsigned_bits(n);
- int m = (1 << l) - n;
- if (!l) return 0;
- v = decode_value(br, l - 1);
- if (v < m)
- return v;
- else
- return (v << 1) - m + decode_value(br, 1);
+ const int l = get_unsigned_bits(n);
+ const int m = (1 << l) - n;
+ if (!l)
+ return 0;
+
+ v = vp9_read_literal(br, l - 1);
+ return v < m ? v : (v << 1) - m + vp9_read_bit(br);
}
int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms) {
int i = 0, mk = 0, word;
while (1) {
- int b = (i ? k + i - 1 : k);
- int a = (1 << b);
+ const int b = i ? k + i - 1 : k;
+ const int a = 1 << b;
if (num_syms <= mk + 3 * a) {
word = vp9_decode_uniform(br, num_syms - mk) + mk;
break;
} else {
- if (decode_value(br, 1)) {
+ if (vp9_read_bit(br)) {
i++;
mk += a;
} else {
- word = decode_value(br, b) + mk;
+ word = vp9_read_literal(br, b) + mk;
break;
}
}
@@ -119,10 +113,8 @@ int vp9_decode_unsigned_max(BOOL_DECODER *br, int max) {
int data = 0, bit = 0, lmax = max;
while (lmax) {
- data |= decode_bool(br, 128) << bit++;
+ data |= vp9_read_bit(br) << bit++;
lmax >>= 1;
}
- if (data > max)
- return max;
- return data;
+ return data > max ? max : data;
}
diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h
index 02ae1d3c8..10b7a1af3 100644
--- a/vp9/decoder/vp9_dboolhuff.h
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -21,32 +21,40 @@
typedef size_t VP9_BD_VALUE;
#define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
-/*This is meant to be a large, positive constant that can still be efficiently
- loaded as an immediate (on platforms like ARM, for example).
- Even relatively modest values like 100 would work fine.*/
-#define VP9_LOTS_OF_BITS (0x40000000)
+
+// This is meant to be a large, positive constant that can still be efficiently
+// loaded as an immediate (on platforms like ARM, for example).
+// Even relatively modest values like 100 would work fine.
+#define VP9_LOTS_OF_BITS 0x40000000
typedef struct {
- const unsigned char *user_buffer_end;
- const unsigned char *user_buffer;
- VP9_BD_VALUE value;
- int count;
- unsigned int range;
+ const uint8_t *buffer_end;
+ const uint8_t *buffer;
+ VP9_BD_VALUE value;
+ int count;
+ unsigned int range;
} BOOL_DECODER;
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
-int vp9_start_decode(BOOL_DECODER *br,
- const unsigned char *source,
- unsigned int source_sz);
+int vp9_start_decode(BOOL_DECODER *br, const uint8_t *buffer, size_t size);
-void vp9_bool_decoder_fill(BOOL_DECODER *br);
+void vp9_reader_fill(BOOL_DECODER *br);
int vp9_decode_uniform(BOOL_DECODER *br, int n);
int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms);
int vp9_inv_recenter_nonneg(int v, int m);
-static int decode_bool(BOOL_DECODER *br, int probability) {
+static INLINE const uint8_t *vp9_reader_find_end(BOOL_DECODER *br) {
+ // Find the end of the coded buffer
+ while (br->count > CHAR_BIT && br->count < VP9_BD_VALUE_SIZE) {
+ br->count -= CHAR_BIT;
+ br->buffer--;
+ }
+ return br->buffer;
+}
+
+static int vp9_read(BOOL_DECODER *br, int probability) {
unsigned int bit = 0;
VP9_BD_VALUE value;
VP9_BD_VALUE bigsplit;
@@ -55,7 +63,7 @@ static int decode_bool(BOOL_DECODER *br, int probability) {
unsigned int split = 1 + (((br->range - 1) * probability) >> 8);
if (br->count < 0)
- vp9_bool_decoder_fill(br);
+ vp9_reader_fill(br);
value = br->value;
count = br->count;
@@ -83,12 +91,15 @@ static int decode_bool(BOOL_DECODER *br, int probability) {
return bit;
}
-static int decode_value(BOOL_DECODER *br, int bits) {
- int z = 0;
- int bit;
+static int vp9_read_bit(BOOL_DECODER *r) {
+ return vp9_read(r, 128); // vp9_prob_half
+}
+
+static int vp9_read_literal(BOOL_DECODER *br, int bits) {
+ int z = 0, bit;
for (bit = bits - 1; bit >= 0; bit--) {
- z |= decode_bool(br, 0x80) << bit;
+ z |= vp9_read_bit(br) << bit;
}
return z;
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 60ef89118..aaa9b2ef0 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -35,8 +35,8 @@ int dec_mvcount = 0;
extern int dec_debug;
#endif
-static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) {
- B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p);
+static B_PREDICTION_MODE read_bmode(vp9_reader *r, const vp9_prob *p) {
+ B_PREDICTION_MODE m = treed_read(r, vp9_bmode_tree, p);
#if CONFIG_NEWBINTRAMODES
if (m == B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS)
m = B_CONTEXT_PRED;
@@ -45,32 +45,32 @@ static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) {
return m;
}
-static B_PREDICTION_MODE read_kf_bmode(vp9_reader *bc, const vp9_prob *p) {
- return (B_PREDICTION_MODE)treed_read(bc, vp9_kf_bmode_tree, p);
+static B_PREDICTION_MODE read_kf_bmode(vp9_reader *r, const vp9_prob *p) {
+ return (B_PREDICTION_MODE)treed_read(r, vp9_kf_bmode_tree, p);
}
-static MB_PREDICTION_MODE read_ymode(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(bc, vp9_ymode_tree, p);
+static MB_PREDICTION_MODE read_ymode(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(r, vp9_ymode_tree, p);
}
-static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(bc, vp9_sb_ymode_tree, p);
+static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(r, vp9_sb_ymode_tree, p);
}
-static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p);
+static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(r, vp9_uv_mode_tree, p);
}
-static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(bc, vp9_kf_ymode_tree, p);
+static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(r, vp9_kf_ymode_tree, p);
}
-static int read_i8x8_mode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_i8x8_mode_tree, p);
+static int read_i8x8_mode(vp9_reader *r, const vp9_prob *p) {
+ return treed_read(r, vp9_i8x8_mode_tree, p);
}
-static MB_PREDICTION_MODE read_uv_mode(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p);
+static MB_PREDICTION_MODE read_uv_mode(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(r, vp9_uv_mode_tree, p);
}
// This function reads the current macro block's segnent id from the bitstream
@@ -117,24 +117,20 @@ int vp9_read_mv_ref_id(vp9_reader *r, vp9_prob *ref_id_probs) {
#endif
extern const int vp9_i8x8_block[4];
-static void kfread_modes(VP9D_COMP *pbi,
- MODE_INFO *m,
- int mb_row,
- int mb_col,
- BOOL_DECODER* const bc) {
+static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m,
+ int mb_row, int mb_col,
+ vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
- MACROBLOCKD *const xd = &pbi->mb;
- const int mis = pbi->common.mode_info_stride;
- int map_index = mb_row * pbi->common.mb_cols + mb_col;
- MB_PREDICTION_MODE y_mode;
-
+ MACROBLOCKD *const xd = &pbi->mb;
+ const int mis = cm->mode_info_stride;
+ const int map_index = mb_row * cm->mb_cols + mb_col;
m->mbmi.ref_frame = INTRA_FRAME;
// Read the Macroblock segmentation map if it is being updated explicitly
// this frame (reset to 0 by default).
m->mbmi.segment_id = 0;
- if (pbi->mb.update_mb_segmentation_map) {
- read_mb_segid(bc, &m->mbmi, &pbi->mb);
+ if (xd->update_mb_segmentation_map) {
+ read_mb_segid(r, &m->mbmi, xd);
if (m->mbmi.sb_type) {
const int bw = 1 << mb_width_log2(m->mbmi.sb_type);
const int bh = 1 << mb_height_log2(m->mbmi.sb_type);
@@ -144,8 +140,8 @@ static void kfread_modes(VP9D_COMP *pbi,
for (y = 0; y < ymbs; y++) {
for (x = 0; x < xmbs; x++) {
- cm->last_frame_seg_map[map_index + x + y * cm->mb_cols] =
- m->mbmi.segment_id;
+ const int index = y * cm->mb_cols + x;
+ cm->last_frame_seg_map[map_index + index] = m->mbmi.segment_id;
}
}
} else {
@@ -153,41 +149,33 @@ static void kfread_modes(VP9D_COMP *pbi,
}
}
- m->mbmi.mb_skip_coeff = 0;
- if (pbi->common.mb_no_coeff_skip &&
- (!vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, SEG_LVL_SKIP))) {
- m->mbmi.mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, &pbi->mb,
- PRED_MBSKIP));
- } else {
- m->mbmi.mb_skip_coeff = vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id,
- SEG_LVL_SKIP);
- }
+ m->mbmi.mb_skip_coeff = vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id,
+ SEG_LVL_SKIP);
+ if (!m->mbmi.mb_skip_coeff)
+ m->mbmi.mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
- y_mode = m->mbmi.sb_type ?
- read_kf_sb_ymode(bc,
- pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]):
- read_kf_mb_ymode(bc,
- pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]);
+ m->mbmi.mode = m->mbmi.sb_type ?
+ read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]):
+ read_kf_mb_ymode(r, cm->kf_ymode_prob[cm->kf_ymode_probs_index]);
m->mbmi.ref_frame = INTRA_FRAME;
- if ((m->mbmi.mode = y_mode) == I4X4_PRED) {
+ if (m->mbmi.mode == I4X4_PRED) {
int i = 0;
do {
const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
- const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ?
+ const B_PREDICTION_MODE l = xd->left_available || (i & 3) ?
left_block_mode(m, i) : B_DC_PRED;
- m->bmi[i].as_mode.first = read_kf_bmode(bc,
- pbi->common.kf_bmode_prob[a][l]);
+ m->bmi[i].as_mode.first = read_kf_bmode(r, cm->kf_bmode_prob[a][l]);
} while (++i < 16);
}
- if ((m->mbmi.mode = y_mode) == I8X8_PRED) {
+ if (m->mbmi.mode == I8X8_PRED) {
int i;
for (i = 0; i < 4; i++) {
const int ib = vp9_i8x8_block[i];
- const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+ const int mode8x8 = read_i8x8_mode(r, cm->fc.i8x8_mode_prob);
m->bmi[ib + 0].as_mode.first = mode8x8;
m->bmi[ib + 1].as_mode.first = mode8x8;
@@ -195,19 +183,18 @@ static void kfread_modes(VP9D_COMP *pbi,
m->bmi[ib + 5].as_mode.first = mode8x8;
}
} else {
- m->mbmi.uv_mode = read_uv_mode(bc,
- pbi->common.kf_uv_mode_prob[m->mbmi.mode]);
+ m->mbmi.uv_mode = read_uv_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]);
}
if (cm->txfm_mode == TX_MODE_SELECT &&
m->mbmi.mb_skip_coeff == 0 &&
m->mbmi.mode <= I8X8_PRED) {
// FIXME(rbultje) code ternary symbol once all experiments are merged
- m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]);
+ m->mbmi.txfm_size = vp9_read(r, cm->prob_tx[0]);
if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) {
- m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]);
+ m->mbmi.txfm_size += vp9_read(r, cm->prob_tx[1]);
if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type >= BLOCK_SIZE_SB32X32)
- m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]);
+ m->mbmi.txfm_size += vp9_read(r, cm->prob_tx[2]);
}
} else if (cm->txfm_mode >= ALLOW_32X32 &&
m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
@@ -271,104 +258,100 @@ static int read_nmv_component_fp(vp9_reader *r,
static void read_nmv(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *mvctx) {
const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints);
- mv->row = mv-> col = 0;
- if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ mv->row = mv->col = 0;
+
+ if (mv_joint_vertical(j))
mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]);
- }
- if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ if (mv_joint_horizontal(j))
mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]);
- }
}
static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *mvctx, int usehp) {
const MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
usehp = usehp && vp9_use_nmv_hp(ref);
- if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ if (mv_joint_vertical(j))
mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0],
usehp);
- }
- if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+
+ if (mv_joint_horizontal(j))
mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1],
usehp);
- }
- /*
- printf("MV: %d %d REF: %d %d\n", mv->row + ref->row, mv->col + ref->col,
- ref->row, ref->col);
- */
}
-static void update_nmv(vp9_reader *bc, vp9_prob *const p,
+static void update_nmv(vp9_reader *r, vp9_prob *const p,
const vp9_prob upd_p) {
- if (vp9_read(bc, upd_p)) {
+ if (vp9_read(r, upd_p)) {
#ifdef LOW_PRECISION_MV_UPDATE
- *p = (vp9_read_literal(bc, 7) << 1) | 1;
+ *p = (vp9_read_literal(r, 7) << 1) | 1;
#else
- *p = (vp9_read_literal(bc, 8));
+ *p = (vp9_read_literal(r, 8));
#endif
}
}
-static void read_nmvprobs(vp9_reader *bc, nmv_context *mvctx,
+static void read_nmvprobs(vp9_reader *r, nmv_context *mvctx,
int usehp) {
int i, j, k;
#ifdef MV_GROUP_UPDATE
- if (!vp9_read_bit(bc))
+ if (!vp9_read_bit(r))
return;
#endif
for (j = 0; j < MV_JOINTS - 1; ++j)
- update_nmv(bc, &mvctx->joints[j], VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->joints[j], VP9_NMV_UPDATE_PROB);
for (i = 0; i < 2; ++i) {
- update_nmv(bc, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB);
for (j = 0; j < MV_CLASSES - 1; ++j)
- update_nmv(bc, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB);
for (j = 0; j < CLASS0_SIZE - 1; ++j)
- update_nmv(bc, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB);
for (j = 0; j < MV_OFFSET_BITS; ++j)
- update_nmv(bc, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB);
}
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j)
for (k = 0; k < 3; ++k)
- update_nmv(bc, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB);
for (j = 0; j < 3; ++j)
- update_nmv(bc, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
- update_nmv(bc, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
- update_nmv(bc, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
+ update_nmv(r, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB);
}
}
}
// Read the referncence frame
static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi,
- vp9_reader *const bc,
+ vp9_reader *r,
unsigned char segment_id) {
MV_REFERENCE_FRAME ref_frame;
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
int seg_ref_count = 0;
- int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
+ const int seg_ref_active = vp9_segfeature_active(xd, segment_id,
+ SEG_LVL_REF_FRAME);
+
+ const int intra = vp9_check_segref(xd, segment_id, INTRA_FRAME);
+ const int last = vp9_check_segref(xd, segment_id, LAST_FRAME);
+ const int golden = vp9_check_segref(xd, segment_id, GOLDEN_FRAME);
+ const int altref = vp9_check_segref(xd, segment_id, ALTREF_FRAME);
// If segment coding enabled does the segment allow for more than one
// possible reference frame
- if (seg_ref_active) {
- seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) +
- vp9_check_segref(xd, segment_id, LAST_FRAME) +
- vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
- vp9_check_segref(xd, segment_id, ALTREF_FRAME);
- }
+ if (seg_ref_active)
+ seg_ref_count = intra + last + golden + altref;
// Segment reference frame features not available or allows for
// multiple reference frame options
@@ -380,7 +363,7 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi,
vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
// Read the prediction status flag
- unsigned char prediction_flag = vp9_read(bc, pred_prob);
+ unsigned char prediction_flag = vp9_read(r, pred_prob);
// Store the prediction flag.
vp9_set_pred_flag(xd, PRED_REF, prediction_flag);
@@ -394,19 +377,15 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi,
} else {
// decode the explicitly coded value
vp9_prob mod_refprobs[PREDICTION_PROBS];
- vpx_memcpy(mod_refprobs,
- cm->mod_refprobs[pred_ref], sizeof(mod_refprobs));
+ vpx_memcpy(mod_refprobs, cm->mod_refprobs[pred_ref],
+ sizeof(mod_refprobs));
// If segment coding enabled blank out options that cant occur by
// setting the branch probability to 0.
if (seg_ref_active) {
- mod_refprobs[INTRA_FRAME] *=
- vp9_check_segref(xd, segment_id, INTRA_FRAME);
- mod_refprobs[LAST_FRAME] *=
- vp9_check_segref(xd, segment_id, LAST_FRAME);
- mod_refprobs[GOLDEN_FRAME] *=
- vp9_check_segref(xd, segment_id, GOLDEN_FRAME) *
- vp9_check_segref(xd, segment_id, ALTREF_FRAME);
+ mod_refprobs[INTRA_FRAME] *= intra;
+ mod_refprobs[LAST_FRAME] *= last;
+ mod_refprobs[GOLDEN_FRAME] *= golden * altref;
}
// Default to INTRA_FRAME (value 0)
@@ -414,32 +393,28 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi,
// Do we need to decode the Intra/Inter branch
if (mod_refprobs[0])
- ref_frame = vp9_read(bc, mod_refprobs[0]);
+ ref_frame = vp9_read(r, mod_refprobs[0]);
else
ref_frame++;
if (ref_frame) {
// Do we need to decode the Last/Gf_Arf branch
if (mod_refprobs[1])
- ref_frame += vp9_read(bc, mod_refprobs[1]);
+ ref_frame += vp9_read(r, mod_refprobs[1]);
else
ref_frame++;
if (ref_frame > 1) {
// Do we need to decode the GF/Arf branch
if (mod_refprobs[2]) {
- ref_frame += vp9_read(bc, mod_refprobs[2]);
+ ref_frame += vp9_read(r, mod_refprobs[2]);
} else {
- if (seg_ref_active) {
- ref_frame = pred_ref == GOLDEN_FRAME ||
- !vp9_check_segref(xd, segment_id, GOLDEN_FRAME)
- ? ALTREF_FRAME
- : GOLDEN_FRAME;
- } else {
- ref_frame = pred_ref == GOLDEN_FRAME
- ? ALTREF_FRAME
- : GOLDEN_FRAME;
- }
+ if (seg_ref_active)
+ ref_frame = pred_ref == GOLDEN_FRAME || !golden ? ALTREF_FRAME
+ : GOLDEN_FRAME;
+ else
+ ref_frame = pred_ref == GOLDEN_FRAME ? ALTREF_FRAME
+ : GOLDEN_FRAME;
}
}
}
@@ -456,16 +431,16 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi,
return ref_frame;
}
-static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE) treed_read(bc, vp9_sb_mv_ref_tree, p);
+static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE) treed_read(r, vp9_sb_mv_ref_tree, p);
}
-static MB_PREDICTION_MODE read_mv_ref(vp9_reader *bc, const vp9_prob *p) {
- return (MB_PREDICTION_MODE) treed_read(bc, vp9_mv_ref_tree, p);
+static MB_PREDICTION_MODE read_mv_ref(vp9_reader *r, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE) treed_read(r, vp9_mv_ref_tree, p);
}
-static B_PREDICTION_MODE sub_mv_ref(vp9_reader *bc, const vp9_prob *p) {
- return (B_PREDICTION_MODE) treed_read(bc, vp9_sub_mv_ref_tree, p);
+static B_PREDICTION_MODE sub_mv_ref(vp9_reader *r, const vp9_prob *p) {
+ return (B_PREDICTION_MODE) treed_read(r, vp9_sub_mv_ref_tree, p);
}
#ifdef VPX_MODE_COUNT
@@ -486,68 +461,72 @@ static const unsigned char mbsplit_fill_offset[4][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
};
-static void read_switchable_interp_probs(VP9D_COMP* const pbi,
- BOOL_DECODER* const bc) {
+static void read_switchable_interp_probs(VP9D_COMP* const pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
int i, j;
- for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j) {
- for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
- cm->fc.switchable_interp_prob[j][i] = vp9_read_prob(bc);
- }
- }
- //printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0],
- //cm->fc.switchable_interp_prob[1]);
+ for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j)
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i)
+ cm->fc.switchable_interp_prob[j][i] = vp9_read_prob(r);
+}
+
+static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) {
+ COMPPREDMODE_TYPE mode = vp9_read_bit(r);
+ if (mode)
+ mode += vp9_read_bit(r);
+ return mode;
}
-static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) {
+static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
- nmv_context *const nmvc = &pbi->common.fc.nmvc;
- MACROBLOCKD *const xd = &pbi->mb;
if (cm->frame_type == KEY_FRAME) {
if (!cm->kf_ymode_probs_update)
- cm->kf_ymode_probs_index = vp9_read_literal(bc, 3);
+ cm->kf_ymode_probs_index = vp9_read_literal(r, 3);
} else {
+ nmv_context *const nmvc = &pbi->common.fc.nmvc;
+ MACROBLOCKD *const xd = &pbi->mb;
+ int i, j;
+
if (cm->mcomp_filter_type == SWITCHABLE)
- read_switchable_interp_probs(pbi, bc);
+ read_switchable_interp_probs(pbi, r);
#if CONFIG_COMP_INTERINTRA_PRED
if (cm->use_interintra) {
- if (vp9_read(bc, VP9_UPD_INTERINTRA_PROB))
- cm->fc.interintra_prob = vp9_read_prob(bc);
+ if (vp9_read(r, VP9_UPD_INTERINTRA_PROB))
+ cm->fc.interintra_prob = vp9_read_prob(r);
}
#endif
- // Decode the baseline probabilities for decoding reference frame
- cm->prob_intra_coded = vp9_read_prob(bc);
- cm->prob_last_coded = vp9_read_prob(bc);
- cm->prob_gf_coded = vp9_read_prob(bc);
+ // Baseline probabilities for decoding reference frame
+ cm->prob_intra_coded = vp9_read_prob(r);
+ cm->prob_last_coded = vp9_read_prob(r);
+ cm->prob_gf_coded = vp9_read_prob(r);
// Computes a modified set of probabilities for use when reference
// frame prediction fails.
vp9_compute_mod_refprobs(cm);
- cm->comp_pred_mode = vp9_read_bit(bc);
- if (cm->comp_pred_mode)
- cm->comp_pred_mode += vp9_read_bit(bc);
-
- if (cm->comp_pred_mode == HYBRID_PREDICTION) {
- int i;
+ cm->comp_pred_mode = read_comp_pred_mode(r);
+ if (cm->comp_pred_mode == HYBRID_PREDICTION)
for (i = 0; i < COMP_PRED_CONTEXTS; i++)
- cm->prob_comppred[i] = vp9_read_prob(bc);
- }
+ cm->prob_comppred[i] = vp9_read_prob(r);
- if (vp9_read_bit(bc)) {
- int i;
+ // VP9_YMODES
+ if (vp9_read_bit(r))
for (i = 0; i < VP9_YMODES - 1; ++i)
- cm->fc.ymode_prob[i] = vp9_read_prob(bc);
- }
+ cm->fc.ymode_prob[i] = vp9_read_prob(r);
- if (vp9_read_bit(bc)) {
- int i;
+ // VP9_I32X32_MODES
+ if (vp9_read_bit(r))
for (i = 0; i < VP9_I32X32_MODES - 1; ++i)
- cm->fc.sb_ymode_prob[i] = vp9_read_prob(bc);
+ cm->fc.sb_ymode_prob[i] = vp9_read_prob(r);
+
+ for (j = 0; j < PARTITION_PLANES; j++) {
+ if (vp9_read_bit(r)) {
+ for (i = 0; i < PARTITION_TYPES - 1; i++)
+ cm->fc.partition_prob[j][i] = vp9_read_prob(r);
+ }
}
- read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv);
+ read_nmvprobs(r, nmvc, xd->allow_high_precision_mv);
}
}
@@ -556,12 +535,12 @@ static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) {
// value
static void read_mb_segment_id(VP9D_COMP *pbi,
int mb_row, int mb_col,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
MODE_INFO *mi = xd->mode_info_context;
MB_MODE_INFO *mbmi = &mi->mbmi;
- int mb_index = mb_row * pbi->common.mb_cols + mb_col;
+ int mb_index = mb_row * cm->mb_cols + mb_col;
if (xd->segmentation_enabled) {
if (xd->update_mb_segmentation_map) {
@@ -572,7 +551,7 @@ static void read_mb_segment_id(VP9D_COMP *pbi,
vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_SEG_ID);
// Read the prediction status flag
- unsigned char seg_pred_flag = vp9_read(bc, pred_prob);
+ unsigned char seg_pred_flag = vp9_read(r, pred_prob);
// Store the prediction flag.
vp9_set_pred_flag(xd, PRED_SEG_ID, seg_pred_flag);
@@ -583,11 +562,11 @@ static void read_mb_segment_id(VP9D_COMP *pbi,
mbmi->segment_id = vp9_get_pred_mb_segid(cm, xd, mb_index);
} else {
// Decode it explicitly
- read_mb_segid_except(cm, bc, mbmi, xd, mb_row, mb_col);
+ read_mb_segid_except(cm, r, mbmi, xd, mb_row, mb_col);
}
} else {
// Normal unpredicted coding mode
- read_mb_segid(bc, mbmi, xd);
+ read_mb_segid(r, mbmi, xd);
}
if (mbmi->sb_type) {
@@ -599,8 +578,8 @@ static void read_mb_segment_id(VP9D_COMP *pbi,
for (y = 0; y < ymbs; y++) {
for (x = 0; x < xmbs; x++) {
- cm->last_frame_seg_map[mb_index + x + y * cm->mb_cols] =
- mbmi->segment_id;
+ const int index = y * cm->mb_cols + x;
+ cm->last_frame_seg_map[mb_index + index] = mbmi->segment_id;
}
}
} else {
@@ -644,20 +623,19 @@ static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src,
mb_to_bottom_edge);
}
-static INLINE void process_mv(BOOL_DECODER* bc, MV *mv, MV *ref,
+static INLINE void process_mv(vp9_reader *r, MV *mv, MV *ref,
nmv_context *nmvc, nmv_context_counts *mvctx,
int usehp) {
- read_nmv(bc, mv, ref, nmvc);
- read_nmv_fp(bc, mv, ref, nmvc, usehp);
+ read_nmv(r, mv, ref, nmvc);
+ read_nmv_fp(r, mv, ref, nmvc, usehp);
vp9_increment_nmv(mv, ref, mvctx, usehp);
mv->row += ref->row;
mv->col += ref->col;
}
static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
- VP9D_COMP *pbi, BOOL_DECODER* bc) {
- const int index = treed_read(bc,
- vp9_switchable_interp_tree,
+ VP9D_COMP *pbi, vp9_reader *r) {
+ const int index = treed_read(r, vp9_switchable_interp_tree,
vp9_get_pred_probs(&pbi->common, &pbi->mb,
PRED_SWITCHABLE_INTERP));
return vp9_switchable_interp[index];
@@ -666,10 +644,10 @@ static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
MODE_INFO *prev_mi,
int mb_row, int mb_col,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
- nmv_context *const nmvc = &pbi->common.fc.nmvc;
- const int mis = pbi->common.mode_info_stride;
+ nmv_context *const nmvc = &cm->fc.nmvc;
+ const int mis = cm->mode_info_stride;
MACROBLOCKD *const xd = &pbi->mb;
int_mv *const mv = &mbmi->mv[0];
@@ -703,23 +681,15 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
// Read the macroblock segment id.
- read_mb_segment_id(pbi, mb_row, mb_col, bc);
+ read_mb_segment_id(pbi, mb_row, mb_col, r);
- if (pbi->common.mb_no_coeff_skip &&
- (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP))) {
- // Read the macroblock coeff skip flag if this feature is in use,
- // else default to 0
- mbmi->mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
- } else {
- mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id,
- SEG_LVL_SKIP);
- }
+ mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id,
+ SEG_LVL_SKIP);
+ if (!mbmi->mb_skip_coeff)
+ mbmi->mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
// Read the reference frame
- mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id);
-
- // if (pbi->common.current_video_frame == 1)
- // printf("ref frame: %d [%d %d]\n", mbmi->ref_frame, mb_row, mb_col);
+ mbmi->ref_frame = read_ref_frame(pbi, r, mbmi->segment_id);
// If reference frame is an Inter frame
if (mbmi->ref_frame) {
@@ -759,8 +729,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
} else {
- mbmi->mode = mbmi->sb_type ? read_sb_mv_ref(bc, mv_ref_p)
- : read_mv_ref(bc, mv_ref_p);
+ mbmi->mode = mbmi->sb_type ? read_sb_mv_ref(r, mv_ref_p)
+ : read_mv_ref(r, mv_ref_p);
vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]);
}
@@ -785,13 +755,13 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) {
mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
- ? read_switchable_filter_type(pbi, bc)
+ ? read_switchable_filter_type(pbi, r)
: cm->mcomp_filter_type;
}
if (cm->comp_pred_mode == COMP_PREDICTION_ONLY ||
(cm->comp_pred_mode == HYBRID_PREDICTION &&
- vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_COMP)))) {
+ vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_COMP)))) {
/* Since we have 3 reference frames, we can only have 3 unique
* combinations of combinations of 2 different reference frames
* (A-G, G-L or A-L). In the bitstream, we use this to simply
@@ -838,17 +808,17 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
if (pbi->common.use_interintra &&
mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV &&
mbmi->second_ref_frame == NONE) {
- mbmi->second_ref_frame = (vp9_read(bc, pbi->common.fc.interintra_prob) ?
+ mbmi->second_ref_frame = (vp9_read(r, pbi->common.fc.interintra_prob) ?
INTRA_FRAME : NONE);
// printf("-- %d (%d)\n", mbmi->second_ref_frame == INTRA_FRAME,
// pbi->common.fc.interintra_prob);
pbi->common.fc.interintra_counts[
mbmi->second_ref_frame == INTRA_FRAME]++;
if (mbmi->second_ref_frame == INTRA_FRAME) {
- mbmi->interintra_mode = read_ymode(bc, pbi->common.fc.ymode_prob);
+ mbmi->interintra_mode = read_ymode(r, pbi->common.fc.ymode_prob);
pbi->common.fc.ymode_counts[mbmi->interintra_mode]++;
#if SEPARATE_INTERINTRA_UV
- mbmi->interintra_uv_mode = read_uv_mode(bc,
+ mbmi->interintra_uv_mode = read_uv_mode(r,
pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]);
pbi->common.fc.uv_mode_counts[mbmi->interintra_mode]
[mbmi->interintra_uv_mode]++;
@@ -870,7 +840,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
// Encode the index of the choice.
best_index =
- vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]);
+ vp9_read_mv_ref_id(r, xd->mb_mv_ref_probs[ref_frame]);
best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int;
@@ -879,7 +849,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
// Encode the index of the choice.
best_index =
- vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]);
+ vp9_read_mv_ref_id(r, xd->mb_mv_ref_probs[ref_frame]);
best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int;
}
}
@@ -888,7 +858,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->uv_mode = DC_PRED;
switch (mbmi->mode) {
case SPLITMV: {
- const int s = treed_read(bc, vp9_mbsplit_tree, cm->fc.mbsplit_prob);
+ const int s = treed_read(r, vp9_mbsplit_tree, cm->fc.mbsplit_prob);
const int num_p = vp9_mbsplit_count[s];
int j = 0;
@@ -911,16 +881,16 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
second_abovemv.as_int = above_block_second_mv(mi, k, mis);
}
mv_contz = vp9_mv_cont(&leftmv, &abovemv);
- blockmode = sub_mv_ref(bc, cm->fc.sub_mv_ref_prob [mv_contz]);
+ blockmode = sub_mv_ref(r, cm->fc.sub_mv_ref_prob[mv_contz]);
cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++;
switch (blockmode) {
case NEW4X4:
- process_mv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+ process_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
if (mbmi->second_ref_frame > 0)
- process_mv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+ process_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
#ifdef VPX_MODE_COUNT
@@ -1029,7 +999,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
break;
case NEWMV:
- process_mv(bc, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount,
+ process_mv(r, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount,
xd->allow_high_precision_mv);
mbmi->need_to_clamp_mvs = check_mv_bounds(mv,
mb_to_left_edge,
@@ -1038,7 +1008,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mb_to_bottom_edge);
if (mbmi->second_ref_frame > 0) {
- process_mv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
+ process_mv(r, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
mbmi->need_to_clamp_secondmv = check_mv_bounds(&mbmi->mv[1],
mb_to_left_edge,
@@ -1058,23 +1028,23 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->mv[0].as_int = 0;
if (mbmi->sb_type) {
- mbmi->mode = read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob);
- pbi->common.fc.sb_ymode_counts[mbmi->mode]++;
+ mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
+ cm->fc.sb_ymode_counts[mbmi->mode]++;
} else {
- mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob);
- pbi->common.fc.ymode_counts[mbmi->mode]++;
+ mbmi->mode = read_ymode(r, cm->fc.ymode_prob);
+ cm->fc.ymode_counts[mbmi->mode]++;
}
// If MB mode is I4X4_PRED read the block modes
if (mbmi->mode == I4X4_PRED) {
int j = 0;
do {
- int m = read_bmode(bc, pbi->common.fc.bmode_prob);
+ int m = read_bmode(r, cm->fc.bmode_prob);
mi->bmi[j].as_mode.first = m;
#if CONFIG_NEWBINTRAMODES
if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
#endif
- pbi->common.fc.bmode_counts[m]++;
+ cm->fc.bmode_counts[m]++;
} while (++j < 16);
}
@@ -1082,21 +1052,21 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
int i;
for (i = 0; i < 4; i++) {
const int ib = vp9_i8x8_block[i];
- const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+ const int mode8x8 = read_i8x8_mode(r, cm->fc.i8x8_mode_prob);
mi->bmi[ib + 0].as_mode.first = mode8x8;
mi->bmi[ib + 1].as_mode.first = mode8x8;
mi->bmi[ib + 4].as_mode.first = mode8x8;
mi->bmi[ib + 5].as_mode.first = mode8x8;
- pbi->common.fc.i8x8_mode_counts[mode8x8]++;
+ cm->fc.i8x8_mode_counts[mode8x8]++;
}
} else {
- mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob[mbmi->mode]);
- pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
+ mbmi->uv_mode = read_uv_mode(r, cm->fc.uv_mode_prob[mbmi->mode]);
+ cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
}
}
/*
- if (pbi->common.current_video_frame == 1)
+ if (cm->current_video_frame == 1)
printf("mode: %d skip: %d\n", mbmi->mode, mbmi->mb_skip_coeff);
*/
@@ -1105,12 +1075,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
(mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV &&
mbmi->partitioning == PARTITIONING_4X4)))) {
// FIXME(rbultje) code ternary symbol once all experiments are merged
- mbmi->txfm_size = vp9_read(bc, cm->prob_tx[0]);
+ mbmi->txfm_size = vp9_read(r, cm->prob_tx[0]);
if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV) {
- mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]);
+ mbmi->txfm_size += vp9_read(r, cm->prob_tx[1]);
if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 && mbmi->txfm_size != TX_8X8)
- mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]);
+ mbmi->txfm_size += vp9_read(r, cm->prob_tx[2]);
}
} else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 &&
cm->txfm_mode >= ALLOW_32X32) {
@@ -1129,18 +1099,16 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
}
}
-void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) {
+void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, vp9_reader *r) {
VP9_COMMON *cm = &pbi->common;
+ int k;
+ // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove.
vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));
- if (pbi->common.mb_no_coeff_skip) {
- int k;
- for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
- cm->mbskip_pred_probs[k] = vp9_read_prob(bc);
- }
- }
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ cm->mbskip_pred_probs[k] = vp9_read_prob(r);
- mb_mode_mv_init(pbi, bc);
+ mb_mode_mv_init(pbi, r);
}
#if CONFIG_CODE_NONZEROCOUNT
@@ -1149,24 +1117,24 @@ static uint16_t read_nzc(VP9_COMMON *const cm,
TX_SIZE tx_size,
int ref,
int type,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
int c, e;
uint16_t nzc;
if (!get_nzc_used(tx_size)) return 0;
if (tx_size == TX_32X32) {
- c = treed_read(bc, vp9_nzc32x32_tree,
+ c = treed_read(r, vp9_nzc32x32_tree,
cm->fc.nzc_probs_32x32[nzc_context][ref][type]);
cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
} else if (tx_size == TX_16X16) {
- c = treed_read(bc, vp9_nzc16x16_tree,
+ c = treed_read(r, vp9_nzc16x16_tree,
cm->fc.nzc_probs_16x16[nzc_context][ref][type]);
cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
} else if (tx_size == TX_8X8) {
- c = treed_read(bc, vp9_nzc8x8_tree,
+ c = treed_read(r, vp9_nzc8x8_tree,
cm->fc.nzc_probs_8x8[nzc_context][ref][type]);
cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
} else if (tx_size == TX_4X4) {
- c = treed_read(bc, vp9_nzc4x4_tree,
+ c = treed_read(r, vp9_nzc4x4_tree,
cm->fc.nzc_probs_4x4[nzc_context][ref][type]);
cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
} else {
@@ -1177,7 +1145,7 @@ static uint16_t read_nzc(VP9_COMMON *const cm,
int x = 0;
while (e--) {
int b = vp9_read(
- bc, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]);
+ r, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]);
x |= (b << e);
cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
}
@@ -1198,7 +1166,7 @@ static void read_nzcs_sb64(VP9_COMMON *const cm,
MACROBLOCKD* xd,
int mb_row,
int mb_col,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
MODE_INFO *m = xd->mode_info_context;
MB_MODE_INFO *const mi = &m->mbmi;
int j, nzc_context;
@@ -1216,44 +1184,44 @@ static void read_nzcs_sb64(VP9_COMMON *const cm,
case TX_32X32:
for (j = 0; j < 256; j += 64) {
nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, r);
}
for (j = 256; j < 384; j += 64) {
nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, r);
}
break;
case TX_16X16:
for (j = 0; j < 256; j += 16) {
nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, r);
}
for (j = 256; j < 384; j += 16) {
nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, r);
}
break;
case TX_8X8:
for (j = 0; j < 256; j += 4) {
nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, r);
}
for (j = 256; j < 384; j += 4) {
nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r);
}
break;
case TX_4X4:
for (j = 0; j < 256; ++j) {
nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, r);
}
for (j = 256; j < 384; ++j) {
nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r);
}
break;
@@ -1266,7 +1234,7 @@ static void read_nzcs_sb32(VP9_COMMON *const cm,
MACROBLOCKD* xd,
int mb_row,
int mb_col,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
MODE_INFO *m = xd->mode_info_context;
MB_MODE_INFO *const mi = &m->mbmi;
int j, nzc_context;
@@ -1284,44 +1252,44 @@ static void read_nzcs_sb32(VP9_COMMON *const cm,
case TX_32X32:
for (j = 0; j < 64; j += 64) {
nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, r);
}
for (j = 64; j < 96; j += 16) {
nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, r);
}
break;
case TX_16X16:
for (j = 0; j < 64; j += 16) {
nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, r);
}
for (j = 64; j < 96; j += 16) {
nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, r);
}
break;
case TX_8X8:
for (j = 0; j < 64; j += 4) {
nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, r);
}
for (j = 64; j < 96; j += 4) {
nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r);
}
break;
case TX_4X4:
for (j = 0; j < 64; ++j) {
nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, r);
}
for (j = 64; j < 96; ++j) {
nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r);
}
break;
@@ -1334,7 +1302,7 @@ static void read_nzcs_mb16(VP9_COMMON *const cm,
MACROBLOCKD* xd,
int mb_row,
int mb_col,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
MODE_INFO *m = xd->mode_info_context;
MB_MODE_INFO *const mi = &m->mbmi;
int j, nzc_context;
@@ -1352,28 +1320,28 @@ static void read_nzcs_mb16(VP9_COMMON *const cm,
case TX_16X16:
for (j = 0; j < 16; j += 16) {
nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, r);
}
for (j = 16; j < 24; j += 4) {
nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r);
}
break;
case TX_8X8:
for (j = 0; j < 16; j += 4) {
nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, r);
}
if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
for (j = 16; j < 24; ++j) {
nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r);
}
} else {
for (j = 16; j < 24; j += 4) {
nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, r);
}
}
break;
@@ -1381,11 +1349,11 @@ static void read_nzcs_mb16(VP9_COMMON *const cm,
case TX_4X4:
for (j = 0; j < 16; ++j) {
nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, r);
}
for (j = 16; j < 24; ++j) {
nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
- m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, r);
}
break;
@@ -1399,27 +1367,27 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
int mb_row,
int mb_col,
- BOOL_DECODER* const bc) {
+ vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MODE_INFO *mi = xd->mode_info_context;
MODE_INFO *prev_mi = xd->prev_mode_info_context;
MB_MODE_INFO *const mbmi = &mi->mbmi;
- if (pbi->common.frame_type == KEY_FRAME) {
- kfread_modes(pbi, mi, mb_row, mb_col, bc);
+ if (cm->frame_type == KEY_FRAME) {
+ kfread_modes(pbi, mi, mb_row, mb_col, r);
} else {
- read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc);
+ read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, r);
set_scale_factors(xd,
mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1,
- pbi->common.active_ref_scale);
+ cm->active_ref_scale);
}
#if CONFIG_CODE_NONZEROCOUNT
if (mbmi->sb_type == BLOCK_SIZE_SB64X64)
- read_nzcs_sb64(cm, xd, mb_row, mb_col, bc);
+ read_nzcs_sb64(cm, xd, mb_row, mb_col, r);
else if (mbmi->sb_type == BLOCK_SIZE_SB32X32)
- read_nzcs_sb32(cm, xd, mb_row, mb_col, bc);
+ read_nzcs_sb32(cm, xd, mb_row, mb_col, r);
else
- read_nzcs_mb16(cm, xd, mb_row, mb_col, bc);
+ read_nzcs_mb16(cm, xd, mb_row, mb_col, r);
#endif // CONFIG_CODE_NONZEROCOUNT
if (mbmi->sb_type) {
diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h
index 5cd935760..bf5e83c77 100644
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -17,7 +17,7 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
int mb_row,
int mb_col,
- BOOL_DECODER* const bc);
-void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc);
+ vp9_reader *r);
+void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, vp9_reader *r);
#endif // VP9_DECODER_VP9_DECODEMV_H_
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 6376090ed..47498c1ca 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -204,26 +204,16 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
const TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
- if (tx_type != DCT_DCT) {
- vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
- xd->block[0].dequant, xd->dst.y_buffer,
- xd->dst.y_buffer, xd->dst.y_stride,
- xd->dst.y_stride, xd->plane[0].eobs[0]);
- } else {
- vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer, xd->dst.y_buffer,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->plane[0].eobs[0]);
- }
+ vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
+ xd->block[0].dequant, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->plane[0].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.u_buffer,
- xd->dst.uv_stride, xd->dst.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride,
xd->plane[1].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[20].dequant,
- xd->dst.v_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->dst.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride,
xd->plane[2].eobs[0]);
}
@@ -248,22 +238,13 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra8x8_predict(xd, b, i8x8mode, dst, stride);
}
tx_type = get_tx_type_8x8(xd, ib);
- if (tx_type != DCT_DCT) {
- vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, dst, stride, stride,
- xd->plane[0].eobs[idx]);
- } else {
- vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride,
- xd->plane[0].eobs[idx]);
- }
+ vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, stride,
+ xd->plane[0].eobs[idx]);
}
} else {
vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
- xd->block[0].dequant,
- xd->dst.y_buffer,
- xd->dst.y_stride,
- xd->dst.y_buffer,
- xd->dst.y_stride,
- xd);
+ xd->block[0].dequant, xd->dst.y_buffer,
+ xd->dst.y_stride, xd);
}
// chroma
@@ -278,38 +259,48 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
+ b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[1].eobs[i]);
b = &xd->block[20 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
+ b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[2].eobs[i]);
}
} else if (mode == SPLITMV) {
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
- xd->dst.uv_stride, xd->plane[1].eobs);
+ xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
- xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->plane[2].eobs);
+ xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
} else {
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.u_buffer,
- xd->dst.uv_stride, xd->dst.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride,
xd->plane[1].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
- xd->dst.v_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->dst.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride,
xd->plane[2].eobs[0]);
}
}
+static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) {
+ BLOCKD *const b = &xd->block[idx];
+ struct mb_plane *const y = &xd->plane[0];
+ if (tx_type != DCT_DCT) {
+ vp9_dequant_iht_add_c(tx_type,
+ BLOCK_OFFSET(y->qcoeff, idx, 16),
+ b->dequant, *(b->base_dst) + b->dst,
+ b->dst_stride, y->eobs[idx]);
+ } else {
+ xd->itxm_add(BLOCK_OFFSET(y->qcoeff, idx, 16),
+ b->dequant, *(b->base_dst) + b->dst,
+ b->dst_stride, y->eobs[idx]);
+ }
+}
+
+
static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
TX_TYPE tx_type;
@@ -325,35 +316,20 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra8x8_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
for (j = 0; j < 4; j++) {
- b = &xd->block[ib + iblock[j]];
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
- if (tx_type != DCT_DCT) {
- vp9_dequant_iht_add_c(tx_type,
- BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride,
- b->dst_stride,
- xd->plane[0].eobs[ib + iblock[j]]);
- } else {
- xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
- xd->plane[0].eobs[ib + iblock[j]]);
- }
+ dequant_add_y(xd, tx_type, ib + iblock[j]);
}
b = &xd->block[16 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
+ b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[1].eobs[i]);
b = &xd->block[20 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
+ b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[2].eobs[i]);
}
} else if (mode == I4X4_PRED) {
@@ -369,18 +345,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst,
b->dst_stride);
tx_type = get_tx_type_4x4(xd, i);
- if (tx_type != DCT_DCT) {
- vp9_dequant_iht_add_c(tx_type,
- BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride,
- b->dst_stride, xd->plane[0].eobs[i]);
- } else {
- xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
- xd->plane[0].eobs[i]);
- }
+ dequant_add_y(xd, tx_type, i);
}
#if CONFIG_NEWBINTRAMODES
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
@@ -388,47 +353,28 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
#endif
vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
- xd->dst.uv_stride, xd->plane[1].eobs);
+ xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
- xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->plane[2].eobs);
+ xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
} else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
xd->itxm_add_y_block(xd->plane[0].qcoeff,
xd->block[0].dequant,
- xd->dst.y_buffer, xd->dst.y_stride,
- xd->dst.y_buffer,
- xd->dst.y_stride,
- xd);
+ xd->dst.y_buffer, xd->dst.y_stride, xd);
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
- xd->dst.uv_stride, xd->plane[1].eobs);
+ xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
- xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->plane[2].eobs);
+ xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
} else {
for (i = 0; i < 16; i++) {
- BLOCKD *b = &xd->block[i];
tx_type = get_tx_type_4x4(xd, i);
- if (tx_type != DCT_DCT) {
- vp9_dequant_iht_add_c(tx_type,
- BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride,
- b->dst_stride, xd->plane[0].eobs[i]);
- } else {
- xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
- b->dequant, *(b->base_dst) + b->dst,
- *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
- xd->plane[0].eobs[i]);
- }
+ dequant_add_y(xd, tx_type, i);
}
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
- xd->dst.uv_stride, xd->plane[1].eobs);
+ xd->dst.u_buffer, xd->dst.uv_stride,
+ xd->plane[1].eobs);
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
- xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->plane[2].eobs);
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ xd->plane[2].eobs);
}
}
@@ -444,9 +390,7 @@ static INLINE void decode_sby_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int y_offset = (y_idx * 32) * mb->dst.y_stride + (x_idx * 32);
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 1024),
mb->block[0].dequant ,
- mb->dst.y_buffer + y_offset,
- mb->dst.y_buffer + y_offset,
- mb->dst.y_stride, mb->dst.y_stride,
+ mb->dst.y_buffer + y_offset, mb->dst.y_stride,
mb->plane[0].eobs[n * 64]);
}
}
@@ -463,15 +407,11 @@ static INLINE void decode_sbuv_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 1024),
mb->block[16].dequant,
mb->dst.u_buffer + uv_offset,
- mb->dst.u_buffer + uv_offset,
- mb->dst.uv_stride, mb->dst.uv_stride,
- mb->plane[1].eobs[n * 64]);
+ mb->dst.uv_stride, mb->plane[1].eobs[n * 64]);
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 1024),
mb->block[20].dequant,
mb->dst.v_buffer + uv_offset,
- mb->dst.v_buffer + uv_offset,
- mb->dst.uv_stride, mb->dst.uv_stride,
- mb->plane[2].eobs[n * 64]);
+ mb->dst.uv_stride, mb->plane[2].eobs[n * 64]);
}
}
@@ -487,22 +427,12 @@ static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
const TX_TYPE tx_type = get_tx_type_16x16(mb,
(y_idx * (4 * bw) + x_idx) * 4);
- if (tx_type == DCT_DCT) {
- vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
- mb->block[0].dequant ,
- mb->dst.y_buffer + y_offset,
- mb->dst.y_buffer + y_offset,
- mb->dst.y_stride, mb->dst.y_stride,
- mb->plane[0].eobs[n * 16]);
- } else {
- vp9_dequant_iht_add_16x16_c(tx_type,
- BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
- mb->block[0].dequant,
- mb->dst.y_buffer + y_offset,
- mb->dst.y_buffer + y_offset,
- mb->dst.y_stride, mb->dst.y_stride,
- mb->plane[0].eobs[n * 16]);
- }
+ vp9_dequant_iht_add_16x16_c(tx_type,
+ BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
+ mb->block[0].dequant,
+ mb->dst.y_buffer + y_offset,
+ mb->dst.y_stride,
+ mb->plane[0].eobs[n * 16]);
}
}
@@ -520,15 +450,11 @@ static INLINE void decode_sbuv_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
mb->block[16].dequant,
- mb->dst.u_buffer + uv_offset,
- mb->dst.u_buffer + uv_offset,
- mb->dst.uv_stride, mb->dst.uv_stride,
+ mb->dst.u_buffer + uv_offset, mb->dst.uv_stride,
mb->plane[1].eobs[n * 16]);
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256),
mb->block[20].dequant,
- mb->dst.v_buffer + uv_offset,
- mb->dst.v_buffer + uv_offset,
- mb->dst.uv_stride, mb->dst.uv_stride,
+ mb->dst.v_buffer + uv_offset, mb->dst.uv_stride,
mb->plane[2].eobs[n * 16]);
}
}
@@ -546,22 +472,12 @@ static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
const TX_TYPE tx_type = get_tx_type_8x8(xd,
(y_idx * (2 * bw) + x_idx) * 2);
- if (tx_type == DCT_DCT) {
- vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
- xd->block[0].dequant,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->plane[0].eobs[n * 4]);
- } else {
- vp9_dequant_iht_add_8x8_c(tx_type,
- BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
- xd->block[0].dequant,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->plane[0].eobs[n * 4]);
- }
+
+ vp9_dequant_iht_add_8x8_c(tx_type,
+ BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset, xd->dst.y_stride,
+ xd->plane[0].eobs[n * 4]);
}
}
@@ -576,18 +492,14 @@ static INLINE void decode_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> (bwl - 1);
const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
- vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
- xd->block[16].dequant,
- xd->dst.u_buffer + uv_offset,
- xd->dst.u_buffer + uv_offset,
- xd->dst.uv_stride, xd->dst.uv_stride,
- xd->plane[1].eobs[n * 4]);
- vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
- xd->block[20].dequant,
- xd->dst.v_buffer + uv_offset,
- xd->dst.v_buffer + uv_offset,
- xd->dst.uv_stride, xd->dst.uv_stride,
- xd->plane[2].eobs[n * 4]);
+ vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
+ xd->block[16].dequant,
+ xd->dst.u_buffer + uv_offset, xd->dst.uv_stride,
+ xd->plane[1].eobs[n * 4]);
+ vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
+ xd->block[20].dequant,
+ xd->dst.v_buffer + uv_offset, xd->dst.uv_stride,
+ xd->plane[2].eobs[n * 4]);
}
}
@@ -605,19 +517,13 @@ static INLINE void decode_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
if (tx_type == DCT_DCT) {
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
xd->block[0].dequant,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_stride, xd->dst.y_stride,
+ xd->dst.y_buffer + y_offset, xd->dst.y_stride,
xd->plane[0].eobs[n]);
} else {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
- xd->block[0].dequant,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_buffer + y_offset,
- xd->dst.y_stride,
- xd->dst.y_stride,
- xd->plane[0].eobs[n]);
+ xd->block[0].dequant, xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->plane[0].eobs[n]);
}
}
}
@@ -634,14 +540,10 @@ static INLINE void decode_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
xd->block[16].dequant,
- xd->dst.u_buffer + uv_offset,
- xd->dst.u_buffer + uv_offset,
- xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[n]);
+ xd->dst.u_buffer + uv_offset, xd->dst.uv_stride, xd->plane[1].eobs[n]);
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16),
xd->block[20].dequant,
- xd->dst.v_buffer + uv_offset,
- xd->dst.v_buffer + uv_offset,
- xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[n]);
+ xd->dst.v_buffer + uv_offset, xd->dst.uv_stride, xd->plane[2].eobs[n]);
}
}
@@ -844,10 +746,8 @@ static int get_delta_q(vp9_reader *r, int *dq) {
const int old_value = *dq;
if (vp9_read_bit(r)) { // Update bit
- int value = vp9_read_literal(r, 4);
- if (vp9_read_bit(r)) // Sign bit
- value = -value;
- *dq = value;
+ const int value = vp9_read_literal(r, 4);
+ *dq = vp9_read_and_apply_sign(r, value);
}
// Trigger a quantizer update if the delta-q value has changed
@@ -915,10 +815,83 @@ static void set_refs(VP9D_COMP *pbi, int mb_row, int mb_col) {
}
}
-/* Decode a row of Superblocks (2x2 region of MBs) */
-static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) {
+static void decode_modes_b(VP9D_COMP *pbi, int mb_row, int mb_col,
+ vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD *const xd = &pbi->mb;
+
+ set_offsets(pbi, bsize, mb_row, mb_col);
+ vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, r);
+ set_refs(pbi, mb_row, mb_col);
+
+ // TODO(jingning): merge decode_sb_ and decode_mb_
+ if (bsize > BLOCK_SIZE_MB16X16)
+ decode_sb(pbi, xd, mb_row, mb_col, r, bsize);
+ else
+ decode_mb(pbi, xd, mb_row, mb_col, r);
+
+ xd->corrupted |= bool_error(r);
+}
+
+static void decode_modes_sb(VP9D_COMP *pbi, int mb_row, int mb_col,
+ vp9_reader* r, BLOCK_SIZE_TYPE bsize) {
VP9_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
+ int bsl = mb_width_log2(bsize), bs = (1 << bsl) / 2;
+ int n;
+ PARTITION_TYPE partition = PARTITION_NONE;
+ BLOCK_SIZE_TYPE subsize;
+
+ if (mb_row >= pc->mb_rows || mb_col >= pc->mb_cols)
+ return;
+
+ if (bsize > BLOCK_SIZE_MB16X16) {
+ // read the partition information
+ partition = treed_read(r, vp9_partition_tree,
+ pc->fc.partition_prob[bsl - 1]);
+ pc->fc.partition_counts[bsl - 1][partition]++;
+ }
+
+ switch (partition) {
+ case PARTITION_NONE:
+ subsize = bsize;
+ decode_modes_b(pbi, mb_row, mb_col, r, subsize);
+ break;
+#if CONFIG_SBSEGMENT
+ case PARTITION_HORZ:
+ subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 :
+ BLOCK_SIZE_SB32X16;
+ decode_modes_b(pbi, mb_row, mb_col, r, subsize);
+ if ((mb_row + bs) < pc->mb_rows)
+ decode_modes_b(pbi, mb_row + bs, mb_col, r, subsize);
+ break;
+ case PARTITION_VERT:
+ subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 :
+ BLOCK_SIZE_SB16X32;
+ decode_modes_b(pbi, mb_row, mb_col, r, subsize);
+ if ((mb_col + bs) < pc->mb_cols)
+ decode_modes_b(pbi, mb_row, mb_col + bs, r, subsize);
+ break;
+#endif
+ case PARTITION_SPLIT:
+ subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X32 :
+ BLOCK_SIZE_MB16X16;
+ for (n = 0; n < 4; n++) {
+ int j = n >> 1, i = n & 0x01;
+ if (subsize == BLOCK_SIZE_SB32X32)
+ xd->sb_index = n;
+ else
+ xd->mb_index = n;
+ decode_modes_sb(pbi, mb_row + j * bs, mb_col + i * bs, r, subsize);
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+/* Decode a row of Superblocks (4x4 region of MBs) */
+static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) {
+ VP9_COMMON *const pc = &pbi->common;
int mb_col;
// For a SB there are 2 left contexts, each pertaining to a MB row within
@@ -926,60 +899,10 @@ static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) {
for (mb_col = pc->cur_tile_mb_col_start;
mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
- if (vp9_read(r, pc->prob_sb64_coded)) {
- // SB64 decoding
- set_offsets(pbi, BLOCK_SIZE_SB64X64, mb_row, mb_col);
- vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, r);
- set_refs(pbi, mb_row, mb_col);
- decode_sb(pbi, xd, mb_row, mb_col, r, BLOCK_SIZE_SB64X64);
- xd->corrupted |= bool_error(r);
- } else {
- // not SB64
- int j;
- for (j = 0; j < 4; j++) {
- const int x_idx_sb = mb_col + 2 * (j % 2);
- const int y_idx_sb = mb_row + 2 * (j / 2);
-
- if (y_idx_sb >= pc->mb_rows || x_idx_sb >= pc->mb_cols)
- continue; // MB lies outside frame, skip on to next
-
- xd->sb_index = j;
-
- if (vp9_read(r, pc->prob_sb32_coded)) {
- // SB32 decoding
- set_offsets(pbi, BLOCK_SIZE_SB32X32, y_idx_sb, x_idx_sb);
- vp9_decode_mb_mode_mv(pbi, xd, y_idx_sb, x_idx_sb, r);
- set_refs(pbi, y_idx_sb, x_idx_sb);
- decode_sb(pbi, xd, y_idx_sb, x_idx_sb, r, BLOCK_SIZE_SB32X32);
- xd->corrupted |= bool_error(r);
- } else {
- // not SB32
- // Process the 4 MBs within the SB in the order:
- // top-left, top-right, bottom-left, bottom-right
- int i;
- for (i = 0; i < 4; i++) {
- const int x_idx_mb = x_idx_sb + (i % 2);
- const int y_idx_mb = y_idx_sb + (i / 2);
-
- if (y_idx_mb >= pc->mb_rows || x_idx_mb >= pc->mb_cols)
- continue; // MB lies outside frame, skip on to next
-
- xd->mb_index = i;
-
- // MB decoding
- set_offsets(pbi, BLOCK_SIZE_MB16X16, y_idx_mb, x_idx_mb);
- vp9_decode_mb_mode_mv(pbi, xd, y_idx_mb, x_idx_mb, r);
- set_refs(pbi, y_idx_mb, x_idx_mb);
- decode_mb(pbi, xd, y_idx_mb, x_idx_mb, r);
- xd->corrupted |= bool_error(r);
- }
- }
- }
- }
+ decode_modes_sb(pbi, mb_row, mb_col, r, BLOCK_SIZE_SB64X64);
}
}
-
static void setup_token_decoder(VP9D_COMP *pbi,
const uint8_t *data,
vp9_reader *r) {
@@ -1232,9 +1155,8 @@ static void setup_segmentation(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) {
if (feature_enabled) {
vp9_enable_segfeature(xd, i, j);
data = vp9_decode_unsigned_max(r, vp9_seg_feature_data_max(j));
- if (vp9_is_segfeature_signed(j) && vp9_read_bit(r)) {
- data = -data;
- }
+ if (vp9_is_segfeature_signed(j))
+ data = vp9_read_and_apply_sign(r, data);
}
vp9_set_segdata(xd, i, j, data);
}
@@ -1283,19 +1205,15 @@ static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) {
for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
if (vp9_read_bit(r)) {
- int value = vp9_read_literal(r, 6);
- if (vp9_read_bit(r))
- value = -value;
- xd->ref_lf_deltas[i] = value;
+ const int value = vp9_read_literal(r, 6);
+ xd->ref_lf_deltas[i] = vp9_read_and_apply_sign(r, value);
}
}
for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
if (vp9_read_bit(r)) {
- int value = vp9_read_literal(r, 6);
- if (vp9_read_bit(r))
- value = -value;
- xd->mode_lf_deltas[i] = value;
+ const int value = vp9_read_literal(r, 6);
+ xd->mode_lf_deltas[i] = vp9_read_and_apply_sign(r, value);
}
}
}
@@ -1395,6 +1313,7 @@ static void update_frame_context(VP9D_COMP *pbi, vp9_reader *r) {
vp9_copy(fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob);
vp9_copy(fc->pre_sub_mv_ref_prob, fc->sub_mv_ref_prob);
vp9_copy(fc->pre_mbsplit_prob, fc->mbsplit_prob);
+ vp9_copy(fc->pre_partition_prob, fc->partition_prob);
fc->pre_nmvc = fc->nmvc;
vp9_zero(fc->coef_counts_4x4);
@@ -1411,6 +1330,7 @@ static void update_frame_context(VP9D_COMP *pbi, vp9_reader *r) {
vp9_zero(fc->mbsplit_counts);
vp9_zero(fc->NMVcount);
vp9_zero(fc->mv_ref_ct);
+ vp9_zero(fc->partition_counts);
#if CONFIG_COMP_INTERINTRA_PRED
fc->pre_interintra_prob = fc->interintra_prob;
@@ -1602,8 +1522,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
setup_pred_probs(pc, &header_bc);
- pc->prob_sb64_coded = vp9_read_prob(&header_bc);
- pc->prob_sb32_coded = vp9_read_prob(&header_bc);
xd->lossless = vp9_read_bit(&header_bc);
pc->txfm_mode = xd->lossless ? ONLY_4X4 : read_txfm_mode(&header_bc);
if (pc->txfm_mode == TX_MODE_SELECT) {
@@ -1743,8 +1661,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff));
vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff));
- // Read the mb_no_coeff_skip flag
- pc->mb_no_coeff_skip = vp9_read_bit(&header_bc);
+ vp9_read_bit(&header_bc); // unused
vp9_decode_mode_mvs_init(pbi, &header_bc);
@@ -1799,12 +1716,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
}
#endif
- // Find the end of the coded buffer
- while (residual_bc.count > CHAR_BIT &&
- residual_bc.count < VP9_BD_VALUE_SIZE) {
- residual_bc.count -= CHAR_BIT;
- residual_bc.user_buffer--;
- }
- *p_data_end = residual_bc.user_buffer;
+ *p_data_end = vp9_reader_find_end(&residual_bc);
return 0;
}
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
index ade216a0c..09302014e 100644
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -16,105 +16,104 @@
#include "vp9/common/vp9_common.h"
-static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
- uint8_t *dest, int stride, int width, int height) {
+static void add_residual(const int16_t *diff, uint8_t *dest, int stride,
+ int width, int height) {
int r, c;
for (r = 0; r < height; r++) {
for (c = 0; c < width; c++)
- dest[c] = clip_pixel(diff[c] + pred[c]);
+ dest[c] = clip_pixel(diff[c] + dest[c]);
dest += stride;
diff += width;
- pred += pitch;
}
}
-void vp9_add_residual_4x4_c(const int16_t *diff, const uint8_t *pred, int pitch,
- uint8_t *dest, int stride) {
- add_residual(diff, pred, pitch, dest, stride, 4, 4);
+void vp9_add_residual_4x4_c(const int16_t *diff, uint8_t *dest, int stride) {
+ add_residual(diff, dest, stride, 4, 4);
}
-void vp9_add_residual_8x8_c(const int16_t *diff, const uint8_t *pred, int pitch,
- uint8_t *dest, int stride) {
- add_residual(diff, pred, pitch, dest, stride, 8, 8);
+void vp9_add_residual_8x8_c(const int16_t *diff, uint8_t *dest, int stride) {
+ add_residual(diff, dest, stride, 8, 8);
}
-void vp9_add_residual_16x16_c(const int16_t *diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
- add_residual(diff, pred, pitch, dest, stride, 16, 16);
+void vp9_add_residual_16x16_c(const int16_t *diff, uint8_t *dest, int stride) {
+ add_residual(diff, dest, stride, 16, 16);
}
-void vp9_add_residual_32x32_c(const int16_t *diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
- add_residual(diff, pred, pitch, dest, stride, 32, 32);
+void vp9_add_residual_32x32_c(const int16_t *diff,uint8_t *dest, int stride) {
+ add_residual(diff, dest, stride, 32, 32);
}
-static void add_constant_residual(const int16_t diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride,
+static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
int width, int height) {
int r, c;
for (r = 0; r < height; r++) {
for (c = 0; c < width; c++)
- dest[c] = clip_pixel(diff + pred[c]);
+ dest[c] = clip_pixel(diff + dest[c]);
dest += stride;
- pred += pitch;
}
}
-void vp9_add_constant_residual_8x8_c(const int16_t diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
- add_constant_residual(diff, pred, pitch, dest, stride, 8, 8);
+void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest,
+ int stride) {
+ add_constant_residual(diff, dest, stride, 8, 8);
}
-void vp9_add_constant_residual_16x16_c(const int16_t diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
- add_constant_residual(diff, pred, pitch, dest, stride, 16, 16);
+void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest,
+ int stride) {
+ add_constant_residual(diff, dest, stride, 16, 16);
}
-void vp9_add_constant_residual_32x32_c(const int16_t diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
- add_constant_residual(diff, pred, pitch, dest, stride, 32, 32);
+void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest,
+ int stride) {
+ add_constant_residual(diff, dest, stride, 32, 32);
}
void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
- uint8_t *pred, uint8_t *dest,
- int pitch, int stride, int eob) {
- int i;
- DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
+ uint8_t *dest, int stride, int eob) {
- for (i = 0; i < 16; i++)
- input[i] *= dq[i];
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add(input, dq, dest, stride, eob);
+ } else {
+ int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
- vp9_short_iht4x4(input, output, 4, tx_type);
- vpx_memset(input, 0, 32);
- vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ for (i = 0; i < 16; i++)
+ input[i] *= dq[i];
+
+ vp9_short_iht4x4(input, output, 4, tx_type);
+ vpx_memset(input, 0, 32);
+ vp9_add_residual_4x4(output, dest, stride);
+ }
}
void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq,
- uint8_t *pred, uint8_t *dest,
- int pitch, int stride, int eob) {
- DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
-
- if (eob > 0) {
- int i;
+ const int16_t *dq, uint8_t *dest,
+ int stride, int eob) {
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_8x8(input, dq, dest, stride, eob);
+ } else {
+ if (eob > 0) {
+ int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
- input[0] *= dq[0];
- for (i = 1; i < 64; i++)
- input[i] *= dq[1];
+ input[0] *= dq[0];
+ for (i = 1; i < 64; i++)
+ input[i] *= dq[1];
- vp9_short_iht8x8(input, output, 8, tx_type);
- vpx_memset(input, 0, 128);
- vp9_add_residual_8x8(output, pred, pitch, dest, stride);
+ vp9_short_iht8x8(input, output, 8, tx_type);
+ vpx_memset(input, 0, 128);
+ vp9_add_residual_8x8(output, dest, stride);
+ }
}
}
-void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride, int eob) {
+void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *dest,
+ int stride, int eob) {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
@@ -125,15 +124,15 @@ void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ vp9_add_residual_4x4(output, dest, stride);
} else {
- vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride);
+ vp9_dc_only_idct_add(input[0]*dq[0], dest, dest, stride, stride);
((int *)input)[0] = 0;
}
}
-void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride, int dc) {
+void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *dest,
+ int stride, int dc) {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
@@ -145,12 +144,11 @@ void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ vp9_add_residual_4x4(output, dest, stride);
}
void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *dest,
- int pitch, int stride, int eob) {
+ uint8_t *dest, int stride, int eob) {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
@@ -160,17 +158,15 @@ void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
vp9_short_iwalsh4x4_c(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ vp9_add_residual_4x4(output, dest, stride);
} else {
- vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride);
+ vp9_dc_only_inv_walsh_add(input[0]*dq[0], dest, dest, stride, stride);
((int *)input)[0] = 0;
}
}
void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
- uint8_t *pred,
- uint8_t *dest,
- int pitch, int stride, int dc) {
+ uint8_t *dest, int stride, int dc) {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
@@ -181,12 +177,11 @@ void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
vp9_short_iwalsh4x4_c(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ vp9_add_residual_4x4(output, dest, stride);
}
void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *dest, int pitch,
- int stride, int eob) {
+ uint8_t *dest, int stride, int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
// If dc is 1, then input[0] is the reconstructed value, do not need
@@ -208,7 +203,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
vp9_short_idct1_8x8_c(&in, &out);
input[0] = 0;
- vp9_add_constant_residual_8x8(out, pred, pitch, dest, stride);
+ vp9_add_constant_residual_8x8(out, dest, stride);
#if !CONFIG_SCATTERSCAN
} else if (eob <= 10) {
input[1] *= dq[1];
@@ -228,7 +223,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
input[16] = input[17] = 0;
input[24] = 0;
- vp9_add_residual_8x8(output, pred, pitch, dest, stride);
+ vp9_add_residual_8x8(output, dest, stride);
#endif
} else {
int i;
@@ -240,41 +235,36 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
// the idct halves ( >> 1) the pitch
vp9_short_idct8x8(input, output, 8 << 1);
vpx_memset(input, 0, 128);
- vp9_add_residual_8x8(output, pred, pitch, dest, stride);
+ vp9_add_residual_8x8(output, dest, stride);
}
}
}
void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride,
+ const int16_t *dq,
+ uint8_t *dest, int stride,
int eob) {
- DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
-
- if (eob > 0) {
- int i;
-
- input[0] *= dq[0];
-
- // recover quantizer for 4 4x4 blocks
- for (i = 1; i < 256; i++)
- input[i] *= dq[1];
-
- // inverse hybrid transform
- vp9_short_iht16x16(input, output, 16, tx_type);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_16x16(input, dq, dest, stride, eob);
+ } else {
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
- // the idct halves ( >> 1) the pitch
- // vp9_short_idct16x16(input, output, 32);
+ if (eob > 0) {
+ int i;
- vpx_memset(input, 0, 512);
+ input[0] *= dq[0];
+ for (i = 1; i < 256; i++)
+ input[i] *= dq[1];
- vp9_add_residual_16x16(output, pred, pitch, dest, stride);
+ vp9_short_iht16x16(input, output, 16, tx_type);
+ vpx_memset(input, 0, 512);
+ vp9_add_residual_16x16(output, dest, stride);
+ }
}
}
void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *dest, int pitch,
- int stride, int eob) {
+ uint8_t *dest, int stride, int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
/* The calculation can be simplified if there are not many non-zero dct
@@ -289,7 +279,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
vp9_short_idct1_16x16_c(&in, &out);
input[0] = 0;
- vp9_add_constant_residual_16x16(out, pred, pitch, dest, stride);
+ vp9_add_constant_residual_16x16(out, dest, stride);
#if !CONFIG_SCATTERSCAN
} else if (eob <= 10) {
input[0] *= dq[0];
@@ -312,7 +302,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
input[32] = input[33] = 0;
input[48] = 0;
- vp9_add_residual_16x16(output, pred, pitch, dest, stride);
+ vp9_add_residual_16x16(output, dest, stride);
#endif
} else {
int i;
@@ -326,21 +316,20 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
// the idct halves ( >> 1) the pitch
vp9_short_idct16x16(input, output, 16 << 1);
vpx_memset(input, 0, 512);
- vp9_add_residual_16x16(output, pred, pitch, dest, stride);
+ vp9_add_residual_16x16(output, dest, stride);
}
}
}
void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *dest, int pitch,
- int stride, int eob) {
+ uint8_t *dest, int stride, int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024);
if (eob) {
input[0] = input[0] * dq[0] / 2;
if (eob == 1) {
vp9_short_idct1_32x32(input, output);
- vp9_add_constant_residual_32x32(output[0], pred, pitch, dest, stride);
+ vp9_add_constant_residual_32x32(output[0], dest, stride);
input[0] = 0;
#if !CONFIG_SCATTERSCAN
} else if (eob <= 10) {
@@ -362,7 +351,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
input[64] = input[65] = 0;
input[96] = 0;
- vp9_add_residual_32x32(output, pred, pitch, dest, stride);
+ vp9_add_residual_32x32(output, dest, stride);
#endif
} else {
int i;
@@ -370,7 +359,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
input[i] = input[i] * dq[1] / 2;
vp9_short_idct32x32(input, output, 64);
vpx_memset(input, 0, 2048);
- vp9_add_residual_32x32(output, pred, pitch, dest, stride);
+ vp9_add_residual_32x32(output, dest, stride);
}
}
}
diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h
index 8b53dd9cb..a635a3b17 100644
--- a/vp9/decoder/vp9_dequantize.h
+++ b/vp9/decoder/vp9_dequantize.h
@@ -16,14 +16,11 @@
void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
- unsigned char *pred,
- unsigned char *output,
- int pitch, int stride, int eob);
+ unsigned char *dest, int stride, int eob);
void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
- unsigned char *pred,
- unsigned char *output,
- int pitch, int stride, int dc);
+ unsigned char *output, int stride,
+ int dc);
void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q,
const int16_t *dq,
@@ -33,30 +30,23 @@ void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q,
const int16_t *dc);
void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
- unsigned char *pre, int pre_stride,
- unsigned char *dst,
- int stride,
+ unsigned char *dst, int stride,
struct macroblockd *xd);
void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
- unsigned char *pre,
- int pre_stride,
unsigned char *dst,
int stride,
uint16_t *eobs);
void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
- unsigned char *pred, unsigned char *dest,
- int pitch, int stride, int eob);
+ unsigned char *dest, int stride, int eob);
void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq, unsigned char *pred,
- unsigned char *dest, int pitch, int stride,
- int eob);
+ const int16_t *dq, unsigned char *dest,
+ int stride, int eob);
void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq, unsigned char *pred,
- unsigned char *dest,
- int pitch, int stride, int eob);
+ const int16_t *dq, unsigned char *dest,
+ int stride, int eob);
#endif // VP9_DECODER_VP9_DEQUANTIZE_H_
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 3df841b88..acb3710e4 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -60,11 +60,6 @@ static const vp9_prob cat6_prob[15] = {
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
-static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) {
- return decode_bool(br, 128) ? -value_to_sign : value_to_sign;
-}
-
-
#define INCREMENT_COUNT(token) \
do { \
coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] \
@@ -77,7 +72,7 @@ static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) {
#if CONFIG_CODE_NONZEROCOUNT
#define WRITE_COEF_CONTINUE(val, token) \
{ \
- qcoeff_ptr[scan[c]] = get_signed(br, val); \
+ qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(br, val); \
INCREMENT_COUNT(token); \
c++; \
nzc++; \
@@ -86,7 +81,7 @@ static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) {
#else
#define WRITE_COEF_CONTINUE(val, token) \
{ \
- qcoeff_ptr[scan[c]] = get_signed(br, val); \
+ qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(br, val); \
INCREMENT_COUNT(token); \
c++; \
continue; \
@@ -125,9 +120,25 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
aidx = vp9_block2above_sb64[txfm_size][block_idx];
lidx = vp9_block2left_sb64[txfm_size][block_idx];
+#if CONFIG_SBSEGMENT
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X32) {
+ aidx = vp9_block2above_sb64x32[txfm_size][block_idx];
+ lidx = vp9_block2left_sb64x32[txfm_size][block_idx];
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X64) {
+ aidx = vp9_block2above_sb32x64[txfm_size][block_idx];
+ lidx = vp9_block2left_sb32x64[txfm_size][block_idx];
+#endif
} else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
aidx = vp9_block2above_sb[txfm_size][block_idx];
lidx = vp9_block2left_sb[txfm_size][block_idx];
+#if CONFIG_SBSEGMENT
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X16) {
+ aidx = vp9_block2above_sb32x16[txfm_size][block_idx];
+ lidx = vp9_block2left_sb32x16[txfm_size][block_idx];
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB16X32) {
+ aidx = vp9_block2above_sb16x32[txfm_size][block_idx];
+ lidx = vp9_block2left_sb16x32[txfm_size][block_idx];
+#endif
} else {
aidx = vp9_block2above[txfm_size][block_idx];
lidx = vp9_block2left[txfm_size][block_idx];
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index 0e3560189..7dd503baa 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -13,101 +13,78 @@
#include "vp9/decoder/vp9_dequantize.h"
void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride,
- uint8_t *dst,
- int stride, MACROBLOCKD *xd) {
+ uint8_t *dst, int stride, MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- vp9_dequant_idct_add(q, dq, pre, dst, pre_stride, stride,
- xd->plane[0].eobs[i * 4 + j]);
+ vp9_dequant_idct_add(q, dq, dst, stride, xd->plane[0].eobs[i * 4 + j]);
q += 16;
- pre += 4;
dst += 4;
}
- pre += 4 * pre_stride - 16;
dst += 4 * stride - 16;
}
}
void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride, uint8_t *dst,
- int stride, uint16_t *eobs) {
+ uint8_t *dst, int stride, uint16_t *eobs) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- vp9_dequant_idct_add(q, dq, pre, dst, pre_stride, stride,
- eobs[i * 2 + j]);
+ vp9_dequant_idct_add(q, dq, dst, stride, eobs[i * 2 + j]);
q += 16;
- pre += 4;
dst += 4;
}
- pre += 4 * pre_stride - 8;
dst += 4 * stride - 8;
}
}
void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride,
- uint8_t *dst,
- int stride, MACROBLOCKD *xd) {
+ uint8_t *dst, int stride,
+ MACROBLOCKD *xd) {
uint8_t *origdest = dst;
- uint8_t *origpred = pre;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, pre_stride, stride,
- xd->plane[0].eobs[0]);
- vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8,
- origdest + 8, pre_stride, stride,
+ vp9_dequant_idct_add_8x8_c(q, dq, dst, stride, xd->plane[0].eobs[0]);
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, origdest + 8, stride,
xd->plane[0].eobs[4]);
- vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * pre_stride,
- origdest + 8 * stride, pre_stride, stride,
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, origdest + 8 * stride, stride,
xd->plane[0].eobs[8]);
- vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * pre_stride + 8,
- origdest + 8 * stride + 8, pre_stride, stride,
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, origdest + 8 * stride + 8, stride,
xd->plane[0].eobs[12]);
}
void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride,
- uint8_t *dst,
- int stride, MACROBLOCKD *xd) {
+ uint8_t *dst, int stride,
+ MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, pre_stride, stride,
+ vp9_dequant_idct_add_lossless_c(q, dq, dst, stride,
xd->plane[0].eobs[i * 4 + j]);
q += 16;
- pre += 4;
dst += 4;
}
- pre += 4 * pre_stride - 16;
dst += 4 * stride - 16;
}
}
void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
- uint8_t *pre, int pre_stride,
- uint8_t *dst,
- int stride,
+ uint8_t *dst, int stride,
uint16_t *eobs) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, pre_stride, stride,
- eobs[i * 2 + j]);
+ vp9_dequant_idct_add_lossless_c(q, dq, dst, stride, eobs[i * 2 + j]);
q += 16;
- pre += 4;
dst += 4;
}
- pre += 4 * pre_stride - 8;
dst += 4 * stride - 8;
}
}
diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h
index 4ec6de99d..c9832e11d 100644
--- a/vp9/decoder/vp9_treereader.h
+++ b/vp9/decoder/vp9_treereader.h
@@ -17,10 +17,8 @@
typedef BOOL_DECODER vp9_reader;
-#define vp9_read decode_bool
-#define vp9_read_literal decode_value
-#define vp9_read_bit(r) vp9_read(r, vp9_prob_half)
#define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8))
+#define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value))
// Intent of tree data structure is to make decoding trivial.
static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */
diff --git a/vp9/decoder/x86/vp9_dequantize_x86.c b/vp9/decoder/x86/vp9_dequantize_x86.c
index acfae2a27..cbe818143 100644
--- a/vp9/decoder/x86/vp9_dequantize_x86.c
+++ b/vp9/decoder/x86/vp9_dequantize_x86.c
@@ -17,8 +17,7 @@
#if HAVE_SSE2
-void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
+void vp9_add_residual_4x4_sse2(const int16_t *diff, uint8_t *dest, int stride) {
const int width = 4;
const __m128i zero = _mm_setzero_si128();
@@ -29,10 +28,10 @@ void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width));
// Prediction data.
- __m128i p0 = _mm_cvtsi32_si128(*(const int *)(pred + 0 * pitch));
- __m128i p1 = _mm_cvtsi32_si128(*(const int *)(pred + 1 * pitch));
- __m128i p2 = _mm_cvtsi32_si128(*(const int *)(pred + 2 * pitch));
- __m128i p3 = _mm_cvtsi32_si128(*(const int *)(pred + 3 * pitch));
+ __m128i p0 = _mm_cvtsi32_si128(*(const int *)(dest + 0 * stride));
+ __m128i p1 = _mm_cvtsi32_si128(*(const int *)(dest + 1 * stride));
+ __m128i p2 = _mm_cvtsi32_si128(*(const int *)(dest + 2 * stride));
+ __m128i p3 = _mm_cvtsi32_si128(*(const int *)(dest + 3 * stride));
p0 = _mm_unpacklo_epi8(p0, zero);
p1 = _mm_unpacklo_epi8(p1, zero);
@@ -61,8 +60,7 @@ void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
*(int *)dest = _mm_cvtsi128_si32(p2);
}
-void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
+void vp9_add_residual_8x8_sse2(const int16_t *diff, uint8_t *dest, int stride) {
const int width = 8;
const __m128i zero = _mm_setzero_si128();
@@ -77,14 +75,14 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
const __m128i d7 = _mm_load_si128((const __m128i *)(diff + 7 * width));
// Prediction data.
- __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch));
- __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch));
- __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch));
- __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch));
- __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch));
- __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch));
- __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch));
- __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch));
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride));
+ __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride));
+ __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride));
+ __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride));
+ __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride));
+ __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride));
+ __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride));
+ __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride));
p0 = _mm_unpacklo_epi8(p0, zero);
p1 = _mm_unpacklo_epi8(p1, zero);
@@ -126,8 +124,8 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
_mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
}
-void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
+void vp9_add_residual_16x16_sse2(const int16_t *diff, uint8_t *dest,
+ int stride) {
const int width = 16;
int i = 4;
const __m128i zero = _mm_setzero_si128();
@@ -147,10 +145,10 @@ void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8));
// Prediction data.
- p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
- p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
- p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
- p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+ p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
+ p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
+ p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
+ p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
p0 = _mm_unpacklo_epi8(p1, zero);
p1 = _mm_unpackhi_epi8(p1, zero);
@@ -181,13 +179,12 @@ void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
_mm_store_si128((__m128i *)(dest + 3 * stride), p3);
diff += 4 * width;
- pred += 4 * pitch;
dest += 4 * stride;
} while (--i);
}
-void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
+void vp9_add_residual_32x32_sse2(const int16_t *diff, uint8_t *dest,
+ int stride) {
const int width = 32;
int i = 16;
const __m128i zero = _mm_setzero_si128();
@@ -207,10 +204,10 @@ void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
d7 = _mm_load_si128((const __m128i *)(diff + 1 * width + 24));
// Prediction data.
- p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
- p3 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16));
- p5 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
- p7 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16));
+ p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
+ p3 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16));
+ p5 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
+ p7 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16));
p0 = _mm_unpacklo_epi8(p1, zero);
p1 = _mm_unpackhi_epi8(p1, zero);
@@ -241,25 +238,24 @@ void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
_mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
diff += 2 * width;
- pred += 2 * pitch;
dest += 2 * stride;
} while (--i);
}
-void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred,
- int pitch, uint8_t *dest, int stride) {
+void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest,
+ int stride) {
uint8_t abs_diff;
__m128i d;
// Prediction data.
- __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch));
- __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch));
- __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch));
- __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch));
- __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch));
- __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch));
- __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch));
- __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch));
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride));
+ __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride));
+ __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride));
+ __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride));
+ __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride));
+ __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride));
+ __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride));
+ __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride));
p0 = _mm_unpacklo_epi64(p0, p1);
p2 = _mm_unpacklo_epi64(p2, p3);
@@ -303,29 +299,28 @@ void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred,
_mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
}
-void vp9_add_constant_residual_16x16_sse2(const int16_t diff,
- const uint8_t *pred, int pitch,
- uint8_t *dest, int stride) {
+void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest,
+ int stride) {
uint8_t abs_diff;
__m128i d;
// Prediction data.
- __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
- __m128i p1 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
- __m128i p2 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
- __m128i p3 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
- __m128i p4 = _mm_load_si128((const __m128i *)(pred + 4 * pitch));
- __m128i p5 = _mm_load_si128((const __m128i *)(pred + 5 * pitch));
- __m128i p6 = _mm_load_si128((const __m128i *)(pred + 6 * pitch));
- __m128i p7 = _mm_load_si128((const __m128i *)(pred + 7 * pitch));
- __m128i p8 = _mm_load_si128((const __m128i *)(pred + 8 * pitch));
- __m128i p9 = _mm_load_si128((const __m128i *)(pred + 9 * pitch));
- __m128i p10 = _mm_load_si128((const __m128i *)(pred + 10 * pitch));
- __m128i p11 = _mm_load_si128((const __m128i *)(pred + 11 * pitch));
- __m128i p12 = _mm_load_si128((const __m128i *)(pred + 12 * pitch));
- __m128i p13 = _mm_load_si128((const __m128i *)(pred + 13 * pitch));
- __m128i p14 = _mm_load_si128((const __m128i *)(pred + 14 * pitch));
- __m128i p15 = _mm_load_si128((const __m128i *)(pred + 15 * pitch));
+ __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
+ __m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
+ __m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
+ __m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
+ __m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride));
+ __m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride));
+ __m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride));
+ __m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride));
+ __m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride));
+ __m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride));
+ __m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride));
+ __m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride));
+ __m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride));
+ __m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride));
+ __m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride));
+ __m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride));
// Clip diff value to [0, 255] range. Then, do addition or subtraction
// according to its sign.
@@ -390,9 +385,8 @@ void vp9_add_constant_residual_16x16_sse2(const int16_t diff,
_mm_store_si128((__m128i *)(dest + 15 * stride), p15);
}
-void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
- const uint8_t *pred, int pitch,
- uint8_t *dest, int stride) {
+void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest,
+ int stride) {
uint8_t abs_diff;
__m128i d;
int i = 8;
@@ -407,14 +401,14 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
do {
// Prediction data.
- __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
- __m128i p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16));
- __m128i p2 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
- __m128i p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16));
- __m128i p4 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
- __m128i p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch + 16));
- __m128i p6 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
- __m128i p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch + 16));
+ __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
+ __m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16));
+ __m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
+ __m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16));
+ __m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
+ __m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16));
+ __m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
+ __m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16));
// Clip diff value to [0, 255] range. Then, do addition or subtraction
// according to its sign.
@@ -448,7 +442,6 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
_mm_store_si128((__m128i *)(dest + 3 * stride), p6);
_mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7);
- pred += 4 * pitch;
dest += 4 * stride;
} while (--i);
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 8da17a6e2..8a644d556 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -78,6 +78,18 @@ int count_mb_seg[4] = { 0, 0, 0, 0 };
#define SEARCH_NEWP
static int update_bits[255];
+static INLINE void write_le16(uint8_t *p, int value) {
+ p[0] = value;
+ p[1] = value >> 8;
+}
+
+static INLINE void write_le32(uint8_t *p, int value) {
+ p[0] = value;
+ p[1] = value >> 8;
+ p[2] = value >> 16;
+ p[3] = value >> 24;
+}
+
static void compute_update_table() {
int i;
for (i = 0; i < 255; i++)
@@ -118,7 +130,7 @@ static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
static void update_mode(
vp9_writer *const bc,
int n,
- vp9_token tok [/* n */],
+ const struct vp9_token tok[/* n */],
vp9_tree tree,
vp9_prob Pnew [/* n-1 */],
vp9_prob Pcur [/* n-1 */],
@@ -191,7 +203,7 @@ static void update_switchable_interp_probs(VP9_COMP *cpi,
for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
if (pc->fc.switchable_interp_prob[j][i] < 1)
pc->fc.switchable_interp_prob[j][i] = 1;
- vp9_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8);
+ vp9_write_prob(bc, pc->fc.switchable_interp_prob[j][i]);
}
}
}
@@ -444,7 +456,7 @@ static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd,
savings = prob_update_savings(ct, *oldp, newp, upd);
if (savings > 0) {
vp9_write(bc, 1, upd);
- vp9_write_literal(bc, newp, 8);
+ vp9_write_prob(bc, newp);
*oldp = newp;
} else {
vp9_write(bc, 0, upd);
@@ -458,12 +470,12 @@ static void pack_mb_tokens(vp9_writer* const bc,
while (p < stop) {
const int t = p->Token;
- vp9_token *const a = vp9_coef_encodings + t;
+ const struct vp9_token *const a = vp9_coef_encodings + t;
const vp9_extra_bit_struct *const b = vp9_extra_bits + t;
int i = 0;
const unsigned char *pp = p->context_tree;
int v = a->value;
- int n = a->Len;
+ int n = a->len;
if (t == EOSB_TOKEN)
{
@@ -508,18 +520,6 @@ static void pack_mb_tokens(vp9_writer* const bc,
*tp = p;
}
-static void write_partition_size(unsigned char *cx_data, int size) {
- signed char csize;
-
- csize = size & 0xff;
- *cx_data = csize;
- csize = (size >> 8) & 0xff;
- *(cx_data + 1) = csize;
- csize = (size >> 16) & 0xff;
- *(cx_data + 2) = csize;
-
-}
-
static void write_mv_ref
(
vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p
@@ -801,9 +801,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
}
}
- if (!pc->mb_no_coeff_skip) {
- skip_coeff = 0;
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
skip_coeff = m->mbmi.mb_skip_coeff;
@@ -996,8 +994,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
(rf != INTRA_FRAME && !(mode == SPLITMV &&
mi->partitioning == PARTITIONING_4X4))) &&
pc->txfm_mode == TX_MODE_SELECT &&
- !((pc->mb_no_coeff_skip && skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
+ !(skip_coeff || vp9_segfeature_active(xd, segment_id,
+ SEG_LVL_SKIP))) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
@@ -1024,9 +1022,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
write_mb_segid(bc, &m->mbmi, xd);
}
- if (!c->mb_no_coeff_skip) {
- skip_coeff = 0;
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
skip_coeff = m->mbmi.mb_skip_coeff;
@@ -1074,8 +1070,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
- !((c->mb_no_coeff_skip && skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
+ !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
TX_SIZE sz = m->mbmi.txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
@@ -1678,64 +1673,104 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
pack_mb_tokens(bc, tok, tok_end);
}
+static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
+ TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+ int mb_row, int mb_col,
+ BLOCK_SIZE_TYPE bsize) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int mis = cm->mode_info_stride;
+ int bwl, bhl;
+#if CONFIG_SBSEGMENT
+ int bw, bh;
+#endif
+ int bsl = mb_width_log2(bsize), bs = (1 << bsl) / 2;
+ int n;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE_TYPE subsize;
+
+ if (mb_row >= cm->mb_rows || mb_col >= cm->mb_cols)
+ return;
+
+ bwl = mb_width_log2(m->mbmi.sb_type);
+ bhl = mb_height_log2(m->mbmi.sb_type);
+#if CONFIG_SBSEGMENT
+ bw = 1 << bwl;
+ bh = 1 << bhl;
+#endif
+
+ // parse the partition type
+ if ((bwl == bsl) && (bhl == bsl))
+ partition = PARTITION_NONE;
+#if CONFIG_SBSEGMENT
+ else if ((bwl == bsl) && (bhl < bsl))
+ partition = PARTITION_HORZ;
+ else if ((bwl < bsl) && (bhl == bsl))
+ partition = PARTITION_VERT;
+#endif
+ else if ((bwl < bsl) && (bhl < bsl))
+ partition = PARTITION_SPLIT;
+ else
+ assert(0);
+
+ if (bsize > BLOCK_SIZE_MB16X16)
+ // encode the partition information
+ write_token(bc, vp9_partition_tree, cm->fc.partition_prob[bsl - 1],
+ vp9_partition_encodings + partition);
+
+ switch (partition) {
+ case PARTITION_NONE:
+ write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
+ break;
+#if CONFIG_SBSEGMENT
+ case PARTITION_HORZ:
+ write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
+ if ((mb_row + bh) < cm->mb_rows)
+ write_modes_b(cpi, m + bh * mis, bc, tok, tok_end, mb_row + bh, mb_col);
+ break;
+ case PARTITION_VERT:
+ write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
+ if ((mb_col + bw) < cm->mb_cols)
+ write_modes_b(cpi, m + bw, bc, tok, tok_end, mb_row, mb_col + bw);
+ break;
+#endif
+ case PARTITION_SPLIT:
+ // TODO(jingning): support recursive partitioning down to 16x16 as for
+ // now. need to merge in 16x8, 8x16, 8x8, and smaller partitions.
+ if (bsize == BLOCK_SIZE_SB64X64)
+ subsize = BLOCK_SIZE_SB32X32;
+ else if (bsize == BLOCK_SIZE_SB32X32)
+ subsize = BLOCK_SIZE_MB16X16;
+ else
+ assert(0);
+ for (n = 0; n < 4; n++) {
+ int j = n >> 1, i = n & 0x01;
+ write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end,
+ mb_row + j * bs, mb_col + i * bs, subsize);
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
VP9_COMMON *const c = &cpi->common;
const int mis = c->mode_info_stride;
MODE_INFO *m, *m_ptr = c->mi;
- int i, mb_row, mb_col;
+ int mb_row, mb_col;
m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis;
for (mb_row = c->cur_tile_mb_row_start;
mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) {
m = m_ptr;
for (mb_col = c->cur_tile_mb_col_start;
- mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) {
- vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->prob_sb64_coded);
- if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
- write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
- } else {
- int j;
-
- for (j = 0; j < 4; j++) {
- const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2;
- MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb;
-
- if (mb_col + x_idx_sb >= c->mb_cols ||
- mb_row + y_idx_sb >= c->mb_rows)
- continue;
-
- vp9_write(bc, sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32,
- c->prob_sb32_coded);
- if (sb_m->mbmi.sb_type) {
- assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32);
- write_modes_b(cpi, sb_m, bc, tok, tok_end,
- mb_row + y_idx_sb, mb_col + x_idx_sb);
- } else {
- // Process the 4 MBs in the order:
- // top-left, top-right, bottom-left, bottom-right
- for (i = 0; i < 4; i++) {
- const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1);
- MODE_INFO *mb_m = m + x_idx + y_idx * mis;
-
- if (mb_row + y_idx >= c->mb_rows ||
- mb_col + x_idx >= c->mb_cols) {
- // MB lies outside frame, move on
- continue;
- }
-
- assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16);
- write_modes_b(cpi, mb_m, bc, tok, tok_end,
- mb_row + y_idx, mb_col + x_idx);
- }
- }
- }
- }
- }
+ mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4)
+ write_modes_sb(cpi, m, bc, tok, tok_end, mb_row, mb_col,
+ BLOCK_SIZE_SB64X64);
}
}
-
/* This function is used for debugging probability trees. */
static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) {
/* print coef probability tree */
@@ -2358,33 +2393,19 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
extra_bytes_packed = 3;
cx_data += extra_bytes_packed;
}
- {
- int v;
-
- if (pc->width != pc->display_width || pc->height != pc->display_height) {
- v = pc->display_width;
- cx_data[0] = v;
- cx_data[1] = v >> 8;
-
- v = pc->display_height;
- cx_data[2] = v;
- cx_data[3] = v >> 8;
- cx_data += 4;
- extra_bytes_packed += 4;
- }
-
- v = pc->width;
- cx_data[0] = v;
- cx_data[1] = v >> 8;
-
- v = pc->height;
- cx_data[2] = v;
- cx_data[3] = v >> 8;
- extra_bytes_packed += 4;
+ if (pc->width != pc->display_width || pc->height != pc->display_height) {
+ write_le16(cx_data, pc->display_width);
+ write_le16(cx_data + 2, pc->display_height);
cx_data += 4;
+ extra_bytes_packed += 4;
}
+ write_le16(cx_data, pc->width);
+ write_le16(cx_data + 2, pc->height);
+ extra_bytes_packed += 4;
+ cx_data += 4;
+
vp9_start_encode(&header_bc, cx_data);
// TODO(jkoleszar): remove these two unused bits?
@@ -2412,20 +2433,20 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
const int prob = xd->mb_segment_tree_probs[i];
if (prob != 255) {
vp9_write_bit(&header_bc, 1);
- vp9_write_literal(&header_bc, prob, 8);
+ vp9_write_prob(&header_bc, prob);
} else {
vp9_write_bit(&header_bc, 0);
}
}
// Write out the chosen coding method.
- vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0);
+ vp9_write_bit(&header_bc, pc->temporal_update);
if (pc->temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++) {
const int prob = pc->segment_pred_probs[i];
if (prob != 255) {
vp9_write_bit(&header_bc, 1);
- vp9_write_literal(&header_bc, prob, 8);
+ vp9_write_prob(&header_bc, prob);
} else {
vp9_write_bit(&header_bc, 0);
}
@@ -2480,18 +2501,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
for (i = 0; i < PREDICTION_PROBS; i++) {
if (cpi->ref_pred_probs_update[i]) {
vp9_write_bit(&header_bc, 1);
- vp9_write_literal(&header_bc, pc->ref_pred_probs[i], 8);
+ vp9_write_prob(&header_bc, pc->ref_pred_probs[i]);
} else {
vp9_write_bit(&header_bc, 0);
}
}
}
- pc->prob_sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]);
- vp9_write_literal(&header_bc, pc->prob_sb64_coded, 8);
- pc->prob_sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
- vp9_write_literal(&header_bc, pc->prob_sb32_coded, 8);
-
vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless);
if (cpi->mb.e_mbd.lossless) {
pc->txfm_mode = ONLY_4X4;
@@ -2529,9 +2545,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT);
}
if (pc->txfm_mode == TX_MODE_SELECT) {
- vp9_write_literal(&header_bc, pc->prob_tx[0], 8);
- vp9_write_literal(&header_bc, pc->prob_tx[1], 8);
- vp9_write_literal(&header_bc, pc->prob_tx[2], 8);
+ vp9_write_prob(&header_bc, pc->prob_tx[0]);
+ vp9_write_prob(&header_bc, pc->prob_tx[1]);
+ vp9_write_prob(&header_bc, pc->prob_tx[2]);
}
}
@@ -2600,8 +2616,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
}
}
- // signal here is multi token partition is enabled
- // vp9_write_literal(&header_bc, pc->multi_token_partition, 2);
+ // TODO(jkoleszar): remove these unused bits
vp9_write_literal(&header_bc, 0, 2);
// Frame Q baseline quantizer index
@@ -2729,7 +2744,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
for (j = 0; j < 4; j++) {
if (new_context[i][j] != pc->fc.vp9_mode_contexts[i][j]) {
vp9_write(&header_bc, 1, 252);
- vp9_write_literal(&header_bc, new_context[i][j], 8);
+ vp9_write_prob(&header_bc, new_context[i][j]);
// Only update the persistent copy if this is the "real pack"
if (!cpi->dummy_packing) {
@@ -2759,7 +2774,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) {
if (new_mvref_probs[i][j] != xd->mb_mv_ref_probs[i][j]) {
vp9_write(&header_bc, 1, VP9_MVREF_UPDATE_PROB);
- vp9_write_literal(&header_bc, new_mvref_probs[i][j], 8);
+ vp9_write_prob(&header_bc, new_mvref_probs[i][j]);
// Only update the persistent copy if this is the "real pack"
if (!cpi->dummy_packing) {
@@ -2811,13 +2826,14 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
+ vp9_copy(cpi->common.fc.pre_partition_prob, cpi->common.fc.partition_prob);
cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
#if CONFIG_COMP_INTERINTRA_PRED
cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob;
#endif
vp9_zero(cpi->sub_mv_ref_count);
vp9_zero(cpi->mbsplit_count);
- vp9_zero(cpi->common.fc.mv_ref_ct)
+ vp9_zero(cpi->common.fc.mv_ref_ct);
update_coef_probs(cpi, &header_bc);
#if CONFIG_CODE_NONZEROCOUNT
@@ -2828,15 +2844,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
active_section = 2;
#endif
- // Write out the mb_no_coeff_skip flag
- vp9_write_bit(&header_bc, pc->mb_no_coeff_skip);
- if (pc->mb_no_coeff_skip) {
- int k;
+ // TODO(jkoleszar): remove this unused bit
+ vp9_write_bit(&header_bc, 1);
- vp9_update_skip_probs(cpi);
- for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
- vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8);
- }
+ vp9_update_skip_probs(cpi);
+ for (i = 0; i < MBSKIP_CONTEXTS; ++i) {
+ vp9_write_prob(&header_bc, pc->mbskip_pred_probs[i]);
}
if (pc->frame_type == KEY_FRAME) {
@@ -2863,9 +2876,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
}
#endif
- vp9_write_literal(&header_bc, pc->prob_intra_coded, 8);
- vp9_write_literal(&header_bc, pc->prob_last_coded, 8);
- vp9_write_literal(&header_bc, pc->prob_gf_coded, 8);
+ vp9_write_prob(&header_bc, pc->prob_intra_coded);
+ vp9_write_prob(&header_bc, pc->prob_last_coded);
+ vp9_write_prob(&header_bc, pc->prob_gf_coded);
{
const int comp_pred_mode = cpi->common.comp_pred_mode;
@@ -2879,13 +2892,21 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
for (i = 0; i < COMP_PRED_CONTEXTS; i++) {
pc->prob_comppred[i] = get_binary_prob(cpi->single_pred_count[i],
cpi->comp_pred_count[i]);
- vp9_write_literal(&header_bc, pc->prob_comppred[i], 8);
+ vp9_write_prob(&header_bc, pc->prob_comppred[i]);
}
}
}
}
update_mbintra_mode_probs(cpi, &header_bc);
+ for (i = 0; i < PARTITION_PLANES; i++) {
+ vp9_prob Pnew[PARTITION_TYPES - 1];
+ unsigned int bct[PARTITION_TYPES - 1][2];
+ update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings,
+ vp9_partition_tree, Pnew, pc->fc.partition_prob[i], bct,
+ (unsigned int *)cpi->partition_count[i]);
+ }
+
vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc);
}
@@ -2961,11 +2982,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
write_modes(cpi, &residual_bc, &tok[tile_col], tok_end);
vp9_stop_encode(&residual_bc);
if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
- /* size of this tile */
- data_ptr[total_size + 0] = residual_bc.pos;
- data_ptr[total_size + 1] = residual_bc.pos >> 8;
- data_ptr[total_size + 2] = residual_bc.pos >> 16;
- data_ptr[total_size + 3] = residual_bc.pos >> 24;
+ // size of this tile
+ write_le32(data_ptr + total_size, residual_bc.pos);
total_size += 4;
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 7c50756a7..f4e3c2eab 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -174,6 +174,7 @@ struct macroblock {
PICK_MODE_CONTEXT sb64x32_context[2];
#endif
PICK_MODE_CONTEXT sb64_context;
+ int partition_cost[PARTITION_PLANES][PARTITION_TYPES];
void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 6365ed9a2..ecd3e2dd3 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -37,30 +37,68 @@ static void fdct4_1d(int16_t *input, int16_t *output) {
}
void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
- int16_t out[4 * 4];
- int16_t *outptr = &out[0];
- const int short_pitch = pitch >> 1;
- int i, j;
- int16_t temp_in[4], temp_out[4];
-
- // Columns
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * short_pitch + i] << 4;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- fdct4_1d(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- outptr[j * 4 + i] = temp_out[j];
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we tranpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ const int stride = pitch >> 1;
+ int pass;
+ // We need an intermediate buffer between passes.
+ int16_t intermediate[4 * 4];
+ int16_t *in = input;
+ int16_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ /*canbe16*/ int input[4];
+ /*canbe16*/ int step[4];
+ /*needs32*/ int temp1, temp2;
+ int i;
+ for (i = 0; i < 4; ++i) {
+ // Load inputs.
+ if (0 == pass) {
+ input[0] = in[0 * stride] << 4;
+ input[1] = in[1 * stride] << 4;
+ input[2] = in[2 * stride] << 4;
+ input[3] = in[3 * stride] << 4;
+ if (i == 0 && input[0]) {
+ input[0] += 1;
+ }
+ } else {
+ input[0] = in[0 * 4];
+ input[1] = in[1 * 4];
+ input[2] = in[2 * 4];
+ input[3] = in[3 * 4];
+ }
+ // Transform.
+ step[0] = input[0] + input[3];
+ step[1] = input[1] + input[2];
+ step[2] = input[1] - input[2];
+ step[3] = input[0] - input[3];
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ out[0] = dct_const_round_shift(temp1);
+ out[2] = dct_const_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ out[1] = dct_const_round_shift(temp1);
+ out[3] = dct_const_round_shift(temp2);
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in++;
+ out += 4;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
}
- // Rows
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- fdct4_1d(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ {
+ int i, j;
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
+ }
}
}
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index b4ba8dc1f..6f0e8c7f4 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -386,7 +386,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x,
int i;
int best_index = 0;
int cost, cost2;
- int zero_seen = (mv_ref_list[0].as_int) ? FALSE : TRUE;
+ int zero_seen = (mv_ref_list[0].as_int) ? 0 : 1;
MACROBLOCKD *xd = &x->e_mbd;
int max_mv = MV_MAX;
@@ -401,7 +401,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x,
if (zero_seen)
break;
else
- zero_seen = TRUE;
+ zero_seen = 1;
}
// Check for cases where the reference choice would give rise to an
@@ -853,29 +853,68 @@ static void encode_sb(VP9_COMP *cpi,
int mb_row,
int mb_col,
int output_enabled,
- TOKENEXTRA **tp, int is_sb) {
+ TOKENEXTRA **tp, BLOCK_SIZE_TYPE is_sb) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_SB32X32;
- cpi->sb32_count[is_sb]++;
- if (is_sb) {
- set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB32X32);
+ if (is_sb == BLOCK_SIZE_SB32X32) {
+ set_offsets(cpi, mb_row, mb_col, bsize);
update_state(cpi, &x->sb32_context[xd->sb_index],
- BLOCK_SIZE_SB32X32, output_enabled);
+ bsize, output_enabled);
encode_superblock(cpi, tp,
- output_enabled, mb_row, mb_col, BLOCK_SIZE_SB32X32);
+ output_enabled, mb_row, mb_col, bsize);
if (output_enabled) {
update_stats(cpi, mb_row, mb_col);
- }
+ cpi->partition_count[partition_plane(bsize)][PARTITION_NONE]++;
- if (output_enabled) {
(*tp)->Token = EOSB_TOKEN;
(*tp)++;
}
+#if CONFIG_SBSEGMENT
+ } else if (is_sb == BLOCK_SIZE_SB16X32) {
+ int i;
+
+ if (output_enabled)
+ cpi->partition_count[partition_plane(bsize)][PARTITION_VERT]++;
+ for (i = 0; i < 2 && mb_col + i != cm->mb_cols; i++) {
+ set_offsets(cpi, mb_row, mb_col + i, BLOCK_SIZE_SB16X32);
+ update_state(cpi, &x->sb16x32_context[xd->sb_index][i],
+ BLOCK_SIZE_SB16X32, output_enabled);
+ encode_superblock(cpi, tp,
+ output_enabled, mb_row, mb_col + i, BLOCK_SIZE_SB16X32);
+ if (output_enabled) {
+ update_stats(cpi, mb_row, mb_col + i);
+
+ (*tp)->Token = EOSB_TOKEN;
+ (*tp)++;
+ }
+ }
+ } else if (is_sb == BLOCK_SIZE_SB32X16) {
+ int i;
+
+ if (output_enabled)
+ cpi->partition_count[partition_plane(bsize)][PARTITION_HORZ]++;
+ for (i = 0; i < 2 && mb_row + i != cm->mb_rows; i++) {
+ set_offsets(cpi, mb_row + i, mb_col, BLOCK_SIZE_SB32X16);
+ update_state(cpi, &x->sb32x16_context[xd->sb_index][i],
+ BLOCK_SIZE_SB32X16, output_enabled);
+ encode_superblock(cpi, tp,
+ output_enabled, mb_row + i, mb_col, BLOCK_SIZE_SB32X16);
+ if (output_enabled) {
+ update_stats(cpi, mb_row + i, mb_col);
+
+ (*tp)->Token = EOSB_TOKEN;
+ (*tp)++;
+ }
+ }
+#endif
} else {
int i;
+ if (output_enabled)
+ cpi->partition_count[partition_plane(bsize)][PARTITION_SPLIT]++;
for (i = 0; i < 4; i++) {
const int x_idx = i & 1, y_idx = i >> 1;
@@ -897,11 +936,9 @@ static void encode_sb(VP9_COMP *cpi,
output_enabled, mb_row + y_idx, mb_col + x_idx);
if (output_enabled) {
update_stats(cpi, mb_row + y_idx, mb_col + x_idx);
- }
- if (output_enabled) {
(*tp)->Token = EOSB_TOKEN;
- (*tp)++;
+ (*tp)++;
}
}
}
@@ -920,24 +957,55 @@ static void encode_sb(VP9_COMP *cpi,
static void encode_sb64(VP9_COMP *cpi,
int mb_row,
int mb_col,
- TOKENEXTRA **tp, int is_sb[4]) {
+ TOKENEXTRA **tp, BLOCK_SIZE_TYPE is_sb[4]) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_SB64X64;
- cpi->sb64_count[is_sb[0] == 2]++;
- if (is_sb[0] == 2) {
- set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB64X64);
- update_state(cpi, &x->sb64_context, BLOCK_SIZE_SB64X64, 1);
+ if (is_sb[0] == BLOCK_SIZE_SB64X64) {
+ set_offsets(cpi, mb_row, mb_col, bsize);
+ update_state(cpi, &x->sb64_context, bsize, 1);
encode_superblock(cpi, tp,
- 1, mb_row, mb_col, BLOCK_SIZE_SB64X64);
+ 1, mb_row, mb_col, bsize);
update_stats(cpi, mb_row, mb_col);
(*tp)->Token = EOSB_TOKEN;
(*tp)++;
- } else {
+ cpi->partition_count[partition_plane(bsize)][PARTITION_NONE]++;
+#if CONFIG_SBSEGMENT
+ } else if (is_sb[0] == BLOCK_SIZE_SB32X64) {
int i;
+ cpi->partition_count[partition_plane(bsize)][PARTITION_VERT]++;
+ for (i = 0; i < 2 && mb_col + i * 2 != cm->mb_cols; i++) {
+ set_offsets(cpi, mb_row, mb_col + i * 2, BLOCK_SIZE_SB32X64);
+ update_state(cpi, &x->sb32x64_context[i], BLOCK_SIZE_SB32X64, 1);
+ encode_superblock(cpi, tp,
+ 1, mb_row, mb_col + i * 2, BLOCK_SIZE_SB32X64);
+ update_stats(cpi, mb_row, mb_col + i * 2);
+
+ (*tp)->Token = EOSB_TOKEN;
+ (*tp)++;
+ }
+ } else if (is_sb[0] == BLOCK_SIZE_SB64X32) {
+ int i;
+
+ cpi->partition_count[partition_plane(bsize)][PARTITION_HORZ]++;
+ for (i = 0; i < 2 && mb_row + i * 2 != cm->mb_rows; i++) {
+ set_offsets(cpi, mb_row + i * 2, mb_col, BLOCK_SIZE_SB64X32);
+ update_state(cpi, &x->sb64x32_context[i], BLOCK_SIZE_SB64X32, 1);
+ encode_superblock(cpi, tp,
+ 1, mb_row + i * 2, mb_col, BLOCK_SIZE_SB64X32);
+ update_stats(cpi, mb_row + i * 2, mb_col);
+
+ (*tp)->Token = EOSB_TOKEN;
+ (*tp)++;
+ }
+#endif
+ } else {
+ int i;
+ cpi->partition_count[partition_plane(bsize)][PARTITION_SPLIT]++;
for (i = 0; i < 4; i++) {
const int x_idx = i & 1, y_idx = i >> 1;
@@ -969,9 +1037,8 @@ static void encode_sb_row(VP9_COMP *cpi,
for (mb_col = cm->cur_tile_mb_col_start;
mb_col < cm->cur_tile_mb_col_end; mb_col += 4) {
int i;
- int sb32_rate = 0, sb32_dist = 0;
- int is_sb[4];
- int sb64_rate = INT_MAX, sb64_dist;
+ BLOCK_SIZE_TYPE sb_partitioning[4];
+ int sb64_rate = 0, sb64_dist = 0;
int sb64_skip = 0;
ENTROPY_CONTEXT_PLANES l[4], a[4];
TOKENEXTRA *tp_orig = *tp;
@@ -980,8 +1047,7 @@ static void encode_sb_row(VP9_COMP *cpi,
memcpy(&l, cm->left_context, sizeof(l));
for (i = 0; i < 4; i++) {
const int x_idx = (i & 1) << 1, y_idx = i & 2;
- int mb_rate = 0, mb_dist = 0;
- int sb_rate = INT_MAX, sb_dist;
+ int sb32_rate = 0, sb32_dist = 0;
int splitmodes_used = 0;
int sb32_skip = 0;
int j;
@@ -997,6 +1063,7 @@ static void encode_sb_row(VP9_COMP *cpi,
vpx_memcpy(a2, cm->above_context + mb_col + x_idx, sizeof(a2));
/* Encode MBs in raster order within the SB */
+ sb_partitioning[i] = BLOCK_SIZE_MB16X16;
for (j = 0; j < 4; j++) {
const int x_idx_m = x_idx + (j & 1), y_idx_m = y_idx + (j >> 1);
int r, d;
@@ -1012,8 +1079,8 @@ static void encode_sb_row(VP9_COMP *cpi,
splitmodes_used += pick_mb_mode(cpi, mb_row + y_idx_m,
mb_col + x_idx_m, tp, &r, &d);
- mb_rate += r;
- mb_dist += d;
+ sb32_rate += r;
+ sb32_dist += d;
// Dummy encode, do not do the tokenization
encode_macroblock(cpi, tp, 0, mb_row + y_idx_m,
@@ -1024,72 +1091,234 @@ static void encode_sb_row(VP9_COMP *cpi,
vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2));
vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2));
- mb_rate += vp9_cost_bit(cm->prob_sb32_coded, 0);
+ sb32_rate += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
+ [PARTITION_SPLIT];
if (cpi->sf.splitmode_breakout) {
sb32_skip = splitmodes_used;
sb64_skip += splitmodes_used;
}
+#if CONFIG_SBSEGMENT
+ // check 32x16
+ if (mb_col + x_idx + 1 < cm->mb_cols) {
+ int r, d;
+
+ xd->mb_index = 0;
+ pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
+ tp, &r, &d, BLOCK_SIZE_SB32X16,
+ &x->sb32x16_context[xd->sb_index][xd->mb_index]);
+ if (mb_row + y_idx + 1 < cm->mb_rows) {
+ int r2, d2;
+
+ update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index],
+ BLOCK_SIZE_SB32X16, 0);
+ encode_superblock(cpi, tp,
+ 0, mb_row + y_idx, mb_col + x_idx,
+ BLOCK_SIZE_SB32X16);
+ xd->mb_index = 1;
+ pick_sb_modes(cpi, mb_row + y_idx + 1, mb_col + x_idx,
+ tp, &r2, &d2, BLOCK_SIZE_SB32X16,
+ &x->sb32x16_context[xd->sb_index][xd->mb_index]);
+ r += r2;
+ d += d2;
+ }
+
+ r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
+ [PARTITION_HORZ];
+
+ /* is this better than MB coding? */
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
+ sb32_rate = r;
+ sb32_dist = d;
+ sb_partitioning[i] = BLOCK_SIZE_SB32X16;
+ }
+
+ vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2));
+ vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2));
+ }
+
+ // check 16x32
+ if (mb_row + y_idx + 1 < cm->mb_rows) {
+ int r, d;
+
+ xd->mb_index = 0;
+ pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
+ tp, &r, &d, BLOCK_SIZE_SB16X32,
+ &x->sb16x32_context[xd->sb_index][xd->mb_index]);
+ if (mb_col + x_idx + 1 < cm->mb_cols) {
+ int r2, d2;
+
+ update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index],
+ BLOCK_SIZE_SB16X32, 0);
+ encode_superblock(cpi, tp,
+ 0, mb_row + y_idx, mb_col + x_idx,
+ BLOCK_SIZE_SB16X32);
+ xd->mb_index = 1;
+ pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx + 1,
+ tp, &r2, &d2, BLOCK_SIZE_SB16X32,
+ &x->sb16x32_context[xd->sb_index][xd->mb_index]);
+ r += r2;
+ d += d2;
+ }
+
+ r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
+ [PARTITION_VERT];
+
+ /* is this better than MB coding? */
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
+ sb32_rate = r;
+ sb32_dist = d;
+ sb_partitioning[i] = BLOCK_SIZE_SB16X32;
+ }
+
+ vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2));
+ vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2));
+ }
+#endif
+
if (!sb32_skip && !(mb_col + x_idx + 1 >= cm->mb_cols ||
mb_row + y_idx + 1 >= cm->mb_rows)) {
+ int r, d;
+
/* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
- tp, &sb_rate, &sb_dist, BLOCK_SIZE_SB32X32,
+ tp, &r, &d, BLOCK_SIZE_SB32X32,
&x->sb32_context[xd->sb_index]);
- sb_rate += vp9_cost_bit(cm->prob_sb32_coded, 1);
+ r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
+ [PARTITION_NONE];
+
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
+ sb32_rate = r;
+ sb32_dist = d;
+ sb_partitioning[i] = BLOCK_SIZE_SB32X32;
+ }
}
- /* Decide whether to encode as a SB or 4xMBs */
- if (sb_rate < INT_MAX &&
- RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) <
- RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) {
- is_sb[i] = 1;
- sb32_rate += sb_rate;
- sb32_dist += sb_dist;
- } else {
- is_sb[i] = 0;
- sb32_rate += mb_rate;
- sb32_dist += mb_dist;
-
- // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled).
- if (cpi->sf.mb16_breakout) {
- ++sb64_skip;
- }
+ // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled).
+ if (cpi->sf.mb16_breakout && sb_partitioning[i] != BLOCK_SIZE_SB32X32) {
+ ++sb64_skip;
}
+ sb64_rate += sb32_rate;
+ sb64_dist += sb32_dist;
+
/* Encode SB using best computed mode(s) */
// FIXME(rbultje): there really shouldn't be any need to encode_mb/sb
// for each level that we go up, we can just keep tokens and recon
// pixels of the lower level; also, inverting SB/MB order (big->small
// instead of small->big) means we can use as threshold for small, which
// may enable breakouts if RD is not good enough (i.e. faster)
- encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, 0, tp, is_sb[i]);
+ encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, 0, tp,
+ sb_partitioning[i]);
}
memcpy(cm->above_context + mb_col, &a, sizeof(a));
memcpy(cm->left_context, &l, sizeof(l));
- sb32_rate += vp9_cost_bit(cm->prob_sb64_coded, 0);
+
+ sb64_rate += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
+ [PARTITION_SPLIT];
+
+#if CONFIG_SBSEGMENT
+ // check 64x32
+ if (mb_col + 3 < cm->mb_cols && !(cm->mb_rows & 1)) {
+ int r, d;
+
+ xd->sb_index = 0;
+ pick_sb_modes(cpi, mb_row, mb_col,
+ tp, &r, &d, BLOCK_SIZE_SB64X32,
+ &x->sb64x32_context[xd->sb_index]);
+ if (mb_row + 2 != cm->mb_rows) {
+ int r2, d2;
+
+ update_state(cpi, &x->sb64x32_context[xd->sb_index],
+ BLOCK_SIZE_SB64X32, 0);
+ encode_superblock(cpi, tp,
+ 0, mb_row, mb_col, BLOCK_SIZE_SB64X32);
+ xd->sb_index = 1;
+ pick_sb_modes(cpi, mb_row + 2, mb_col,
+ tp, &r2, &d2, BLOCK_SIZE_SB64X32,
+ &x->sb64x32_context[xd->sb_index]);
+ r += r2;
+ d += d2;
+ }
+
+ r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
+ [PARTITION_HORZ];
+
+ /* is this better than MB coding? */
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
+ sb64_rate = r;
+ sb64_dist = d;
+ sb_partitioning[0] = BLOCK_SIZE_SB64X32;
+ }
+
+ vpx_memcpy(cm->left_context, l, sizeof(l));
+ vpx_memcpy(cm->above_context + mb_col, a, sizeof(a));
+ }
+
+ // check 32x64
+ if (mb_row + 3 < cm->mb_rows && !(cm->mb_cols & 1)) {
+ int r, d;
+
+ xd->sb_index = 0;
+ pick_sb_modes(cpi, mb_row, mb_col,
+ tp, &r, &d, BLOCK_SIZE_SB32X64,
+ &x->sb32x64_context[xd->sb_index]);
+ if (mb_col + 2 != cm->mb_cols) {
+ int r2, d2;
+
+ update_state(cpi, &x->sb32x64_context[xd->sb_index],
+ BLOCK_SIZE_SB32X64, 0);
+ encode_superblock(cpi, tp,
+ 0, mb_row, mb_col, BLOCK_SIZE_SB32X64);
+ xd->sb_index = 1;
+ pick_sb_modes(cpi, mb_row, mb_col + 2,
+ tp, &r2, &d2, BLOCK_SIZE_SB32X64,
+ &x->sb32x64_context[xd->sb_index]);
+ r += r2;
+ d += d2;
+ }
+
+ r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
+ [PARTITION_VERT];
+
+ /* is this better than MB coding? */
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
+ sb64_rate = r;
+ sb64_dist = d;
+ sb_partitioning[0] = BLOCK_SIZE_SB32X64;
+ }
+
+ vpx_memcpy(cm->left_context, l, sizeof(l));
+ vpx_memcpy(cm->above_context + mb_col, a, sizeof(a));
+ }
+#endif
if (!sb64_skip && !(mb_col + 3 >= cm->mb_cols ||
mb_row + 3 >= cm->mb_rows)) {
- pick_sb_modes(cpi, mb_row, mb_col, tp, &sb64_rate, &sb64_dist,
- BLOCK_SIZE_SB64X64, &x->sb64_context);
- sb64_rate += vp9_cost_bit(cm->prob_sb64_coded, 1);
- }
+ int r, d;
- /* Decide whether to encode as a SB or 4xMBs */
- if (sb64_rate < INT_MAX &&
- RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist) <
- RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
- is_sb[0] = 2;
- *totalrate += sb64_rate;
- } else {
- *totalrate += sb32_rate;
+ pick_sb_modes(cpi, mb_row, mb_col, tp, &r, &d,
+ BLOCK_SIZE_SB64X64, &x->sb64_context);
+ r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
+ [PARTITION_NONE];
+
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
+ sb64_rate = r;
+ sb64_dist = d;
+ sb_partitioning[0] = BLOCK_SIZE_SB64X64;
+ }
}
assert(tp_orig == *tp);
- encode_sb64(cpi, mb_row, mb_col, tp, is_sb);
+ encode_sb64(cpi, mb_row, mb_col, tp, sb_partitioning);
assert(tp_orig < *tp);
}
}
@@ -1139,8 +1368,8 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
vp9_zero(cpi->mbsplit_count)
vp9_zero(cpi->common.fc.mv_ref_ct)
vp9_zero(cpi->sb_ymode_count)
- vp9_zero(cpi->sb32_count);
- vp9_zero(cpi->sb64_count);
+ vp9_zero(cpi->partition_count);
+
#if CONFIG_COMP_INTERINTRA_PRED
vp9_zero(cpi->interintra_count);
vp9_zero(cpi->interintra_select_count);
@@ -1158,7 +1387,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4;
cpi->mb.optimize = 0;
cpi->common.filter_level = 0;
- cpi->zbin_mode_boost_enabled = FALSE;
+ cpi->zbin_mode_boost_enabled = 0;
cpi->common.txfm_mode = ONLY_4X4;
} else {
cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
@@ -1348,7 +1577,6 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
MB_MODE_INFO *const mbmi = &mi->mbmi;
if (mbmi->txfm_size > txfm_max) {
- VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int segment_id = mbmi->segment_id;
@@ -1357,8 +1585,8 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
const int xmbs = MIN(bw, mb_cols_left);
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
- (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs)));
+ assert(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) ||
+ get_skip_flag(mi, mis, ymbs, xmbs));
set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
}
}
@@ -1376,6 +1604,26 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max,
cm->mb_rows - mb_row, cm->mb_cols - mb_col,
BLOCK_SIZE_SB64X64);
+#if CONFIG_SBSEGMENT
+ } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X32) {
+ reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max,
+ cm->mb_rows - mb_row, cm->mb_cols - mb_col,
+ BLOCK_SIZE_SB64X32);
+ if (mb_row + 2 != cm->mb_rows)
+ reset_skip_txfm_size_sb(cpi, mi + 2 * mis, mis, txfm_max,
+ cm->mb_rows - mb_row - 2,
+ cm->mb_cols - mb_col,
+ BLOCK_SIZE_SB64X32);
+ } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB32X64) {
+ reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max,
+ cm->mb_rows - mb_row, cm->mb_cols - mb_col,
+ BLOCK_SIZE_SB32X64);
+ if (mb_col + 2 != cm->mb_cols)
+ reset_skip_txfm_size_sb(cpi, mi + 2, mis, txfm_max,
+ cm->mb_rows - mb_row,
+ cm->mb_cols - mb_col - 2,
+ BLOCK_SIZE_SB32X64);
+#endif
} else {
int i;
@@ -1387,11 +1635,33 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
mb_col + x_idx_sb >= cm->mb_cols)
continue;
- if (sb_mi->mbmi.sb_type) {
+ if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max,
cm->mb_rows - mb_row - y_idx_sb,
cm->mb_cols - mb_col - x_idx_sb,
BLOCK_SIZE_SB32X32);
+#if CONFIG_SBSEGMENT
+ } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X16) {
+ reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max,
+ cm->mb_rows - mb_row - y_idx_sb,
+ cm->mb_cols - mb_col - x_idx_sb,
+ BLOCK_SIZE_SB32X16);
+ if (mb_row + y_idx_sb + 1 != cm->mb_rows)
+ reset_skip_txfm_size_sb(cpi, sb_mi + mis, mis, txfm_max,
+ cm->mb_rows - mb_row - y_idx_sb - 1,
+ cm->mb_cols - mb_col - x_idx_sb,
+ BLOCK_SIZE_SB32X16);
+ } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB16X32) {
+ reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max,
+ cm->mb_rows - mb_row - y_idx_sb,
+ cm->mb_cols - mb_col - x_idx_sb,
+ BLOCK_SIZE_SB16X32);
+ if (mb_col + x_idx_sb + 1 != cm->mb_cols)
+ reset_skip_txfm_size_sb(cpi, sb_mi + 1, mis, txfm_max,
+ cm->mb_rows - mb_row - y_idx_sb,
+ cm->mb_cols - mb_col - x_idx_sb - 1,
+ BLOCK_SIZE_SB16X32);
+#endif
} else {
int m;
@@ -1961,10 +2231,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
if (!x->skip) {
vp9_encode_inter16x16(cm, x, mb_row, mb_col);
- // Clear mb_skip_coeff if mb_no_coeff_skip is not set
- if (!cpi->common.mb_no_coeff_skip)
- mbmi->mb_skip_coeff = 0;
-
} else {
vp9_build_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
@@ -2046,27 +2312,20 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
} else {
// FIXME(rbultje): not tile-aware (mi - 1)
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
-
- if (cm->mb_no_coeff_skip) {
- mbmi->mb_skip_coeff = 1;
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
- } else {
- vp9_stuff_mb(cpi, xd, t, !output_enabled);
- mbmi->mb_skip_coeff = 0;
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
+ int mb_skip_context =
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff;
+
+ mbmi->mb_skip_coeff = 1;
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
}
if (output_enabled) {
int segment_id = mbmi->segment_id;
if (cpi->common.txfm_mode == TX_MODE_SELECT &&
- !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
- (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP)))) {
+ !(mbmi->mb_skip_coeff ||
+ vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP))) {
assert(mbmi->txfm_size <= TX_16X16);
if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV) {
@@ -2090,6 +2349,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
}
}
+void __attribute__((noinline)) hi(void) { }
+
static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
int output_enabled, int mb_row, int mb_col,
BLOCK_SIZE_TYPE bsize) {
@@ -2279,19 +2540,13 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize);
} else {
// FIXME(rbultje): not tile-aware (mi - 1)
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
+ int mb_skip_context =
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff;
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- if (cm->mb_no_coeff_skip) {
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_sb_tokens_context(xd, bsize);
- } else {
- vp9_stuff_sb(cpi, xd, t, !output_enabled, bsize);
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_reset_sb_tokens_context(xd, bsize);
}
// copy skip flag on all mb_mode_info contexts in this SB
@@ -2304,8 +2559,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
- !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
+ !(mi->mbmi.mb_skip_coeff ||
+ vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
if (bsize >= BLOCK_SIZE_SB32X32) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 9431f0781..7c0b3ddeb 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -561,10 +561,10 @@ void vp9_encode_nmv(vp9_writer* const bc, const MV* const mv,
MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
write_token(bc, vp9_mv_joint_tree, mvctx->joints,
vp9_mv_joint_encodings + j);
- if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ if (mv_joint_vertical(j)) {
encode_nmv_component(bc, mv->row, ref->col, &mvctx->comps[0]);
}
- if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ if (mv_joint_horizontal(j)) {
encode_nmv_component(bc, mv->col, ref->col, &mvctx->comps[1]);
}
}
@@ -574,10 +574,10 @@ void vp9_encode_nmv_fp(vp9_writer* const bc, const MV* const mv,
int usehp) {
MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
usehp = usehp && vp9_use_nmv_hp(ref);
- if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ if (mv_joint_vertical(j)) {
encode_nmv_component_fp(bc, mv->row, ref->row, &mvctx->comps[0], usehp);
}
- if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ if (mv_joint_horizontal(j)) {
encode_nmv_component_fp(bc, mv->col, ref->col, &mvctx->comps[1], usehp);
}
}
@@ -603,59 +603,33 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
if (mbmi->mode == SPLITMV) {
int i;
-
- for (i = 0; i < x->partition_info->count; i++) {
- if (x->partition_info->bmi[i].mode == NEW4X4) {
- if (x->e_mbd.allow_high_precision_mv) {
- mv.row = (x->partition_info->bmi[i].mv.as_mv.row
- - best_ref_mv->as_mv.row);
- mv.col = (x->partition_info->bmi[i].mv.as_mv.col
- - best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
- if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) {
- mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
- - second_best_ref_mv->as_mv.row);
- mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
- - second_best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv,
- &cpi->NMVcount, 1);
- }
- } else {
- mv.row = (x->partition_info->bmi[i].mv.as_mv.row
- - best_ref_mv->as_mv.row);
- mv.col = (x->partition_info->bmi[i].mv.as_mv.col
- - best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
- if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) {
- mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
- - second_best_ref_mv->as_mv.row);
- mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
- - second_best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv,
- &cpi->NMVcount, 0);
- }
+ PARTITION_INFO *pi = x->partition_info;
+ for (i = 0; i < pi->count; i++) {
+ if (pi->bmi[i].mode == NEW4X4) {
+ mv.row = (pi->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row);
+ mv.col = (pi->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount,
+ x->e_mbd.allow_high_precision_mv);
+ if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) {
+ mv.row = pi->bmi[i].second_mv.as_mv.row -
+ second_best_ref_mv->as_mv.row;
+ mv.col = pi->bmi[i].second_mv.as_mv.col -
+ second_best_ref_mv->as_mv.col;
+ vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount,
+ x->e_mbd.allow_high_precision_mv);
}
}
}
} else if (mbmi->mode == NEWMV) {
- if (x->e_mbd.allow_high_precision_mv) {
- mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
- mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
- if (mbmi->second_ref_frame > 0) {
- mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
- mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1);
- }
- } else {
- mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
- mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
- if (mbmi->second_ref_frame > 0) {
- mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
- mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
- vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0);
- }
+ mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+ mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount,
+ x->e_mbd.allow_high_precision_mv);
+ if (mbmi->second_ref_frame > 0) {
+ mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+ mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount,
+ x->e_mbd.allow_high_precision_mv);
}
}
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index d333e8e31..04ef55513 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1219,7 +1219,7 @@ static int detect_transition_to_still(
int still_interval,
double loop_decay_rate,
double last_decay_rate) {
- int trans_to_still = FALSE;
+ int trans_to_still = 0;
// Break clause to detect very still sections after motion
// For example a static image after a fade or other transition
@@ -1248,7 +1248,7 @@ static int detect_transition_to_still(
// Only if it does do we signal a transition to still
if (j == still_interval)
- trans_to_still = TRUE;
+ trans_to_still = 1;
}
return trans_to_still;
@@ -1260,7 +1260,7 @@ static int detect_transition_to_still(
static int detect_flash(VP9_COMP *cpi, int offset) {
FIRSTPASS_STATS next_frame;
- int flash_detected = FALSE;
+ int flash_detected = 0;
// Read the frame data.
// The return is FALSE (no flash detected) if not a valid frame
@@ -1272,7 +1272,7 @@ static int detect_flash(VP9_COMP *cpi, int offset) {
// comapred to pcnt_inter.
if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
(next_frame.pcnt_second_ref >= 0.5)) {
- flash_detected = TRUE;
+ flash_detected = 1;
}
}
@@ -1372,7 +1372,7 @@ static int calc_arf_boost(
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
int arf_boost;
- int flash_detected = FALSE;
+ int flash_detected = 0;
// Search forward from the proposed arf/next gf position
for (i = 0; i < f_frames; i++) {
@@ -1541,12 +1541,12 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
if (cpi->twopass.frames_to_key <= (FIXED_ARF_GROUP_SIZE + 8)) {
// Setup a GF group close to the keyframe.
- cpi->source_alt_ref_pending = FALSE;
+ cpi->source_alt_ref_pending = 0;
cpi->baseline_gf_interval = cpi->twopass.frames_to_key;
schedule_frames(cpi, 0, (cpi->baseline_gf_interval - 1), 2, 0, 0);
} else {
// Setup a fixed period ARF group.
- cpi->source_alt_ref_pending = TRUE;
+ cpi->source_alt_ref_pending = 1;
cpi->baseline_gf_interval = FIXED_ARF_GROUP_SIZE;
schedule_frames(cpi, 0, -(cpi->baseline_gf_interval - 1), 2, 1, 0);
}
@@ -1691,7 +1691,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// (for example a static image after a fade or other transition).
if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
last_loop_decay_rate)) {
- allow_alt_ref = FALSE;
+ allow_alt_ref = 0;
break;
}
}
@@ -1769,7 +1769,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(boost_score > 100)) {
// Alternative boost calculation for alt ref
cpi->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost);
- cpi->source_alt_ref_pending = TRUE;
+ cpi->source_alt_ref_pending = 1;
#if CONFIG_MULTIPLE_ARF
// Set the ARF schedule.
@@ -1779,7 +1779,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
} else {
cpi->gfu_boost = (int)boost_score;
- cpi->source_alt_ref_pending = FALSE;
+ cpi->source_alt_ref_pending = 0;
#if CONFIG_MULTIPLE_ARF
// Set the GF schedule.
if (cpi->multi_arf_enabled) {
@@ -2257,7 +2257,7 @@ static int test_candidate_kf(VP9_COMP *cpi,
FIRSTPASS_STATS *last_frame,
FIRSTPASS_STATS *this_frame,
FIRSTPASS_STATS *next_frame) {
- int is_viable_kf = FALSE;
+ int is_viable_kf = 0;
// Does the frame satisfy the primary criteria of a key frame
// If so, then examine how well it predicts subsequent frames
@@ -2329,12 +2329,12 @@ static int test_candidate_kf(VP9_COMP *cpi,
// If there is tolerable prediction for at least the next 3 frames then
// break out else discard this potential key frame and move on
if (boost_score > 30.0 && (i > 3))
- is_viable_kf = TRUE;
+ is_viable_kf = 1;
else {
// Reset the file position
reset_fpf_position(cpi, start_pos);
- is_viable_kf = FALSE;
+ is_viable_kf = 0;
}
}
@@ -2370,7 +2370,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
cpi->this_key_frame_forced = cpi->next_key_frame_forced;
// Clear the alt ref active flag as this can never be active on a key frame
- cpi->source_alt_ref_active = FALSE;
+ cpi->source_alt_ref_active = 0;
// Kf is always a gf so clear frames till next gf counter
cpi->frames_till_gf_update_due = 0;
@@ -2478,9 +2478,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Reset to the start of the group
reset_fpf_position(cpi, current_pos);
- cpi->next_key_frame_forced = TRUE;
+ cpi->next_key_frame_forced = 1;
} else
- cpi->next_key_frame_forced = FALSE;
+ cpi->next_key_frame_forced = 0;
// Special case for the last frame of the file
if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index e642b7487..1649ccade 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -79,9 +79,10 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
MV v;
v.row = mv->as_mv.row - ref->as_mv.row;
v.col = mv->as_mv.col - ref->as_mv.col;
- return ((mvjsadcost[vp9_get_mv_joint(v)] +
- mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
- error_per_bit + 128) >> 8;
+
+ return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(v)] +
+ mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
+ error_per_bit, 8);
}
return 0;
}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index ed21044bb..dc02f146e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1376,8 +1376,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cm->prob_last_coded = 128;
cm->prob_gf_coded = 128;
cm->prob_intra_coded = 63;
- cm->prob_sb32_coded = 200;
- cm->prob_sb64_coded = 200;
for (i = 0; i < COMP_PRED_CONTEXTS; i++)
cm->prob_comppred[i] = 128;
for (i = 0; i < TX_SIZE_MAX_SB - 1; i++)
@@ -1456,11 +1454,11 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq;
- cpi->this_key_frame_forced = FALSE;
- cpi->next_key_frame_forced = FALSE;
+ cpi->this_key_frame_forced = 0;
+ cpi->next_key_frame_forced = 0;
- cpi->source_alt_ref_pending = FALSE;
- cpi->source_alt_ref_active = FALSE;
+ cpi->source_alt_ref_pending = 0;
+ cpi->source_alt_ref_active = 0;
cpi->refresh_alt_ref_frame = 0;
#if CONFIG_MULTIPLE_ARF
@@ -1587,6 +1585,27 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
+#if CONFIG_SBSEGMENT
+ BFP(BLOCK_32X16, vp9_sad32x16, vp9_variance32x16, vp9_sub_pixel_variance32x16,
+ NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad32x16x4d)
+
+ BFP(BLOCK_16X32, vp9_sad16x32, vp9_variance16x32, vp9_sub_pixel_variance16x32,
+ NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad16x32x4d)
+
+ BFP(BLOCK_64X32, vp9_sad64x32, vp9_variance64x32, vp9_sub_pixel_variance64x32,
+ NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad64x32x4d)
+
+ BFP(BLOCK_32X64, vp9_sad32x64, vp9_variance32x64, vp9_sub_pixel_variance32x64,
+ NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad32x64x4d)
+#endif
BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32,
vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v,
@@ -2224,10 +2243,10 @@ static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
if (!cpi->multi_arf_enabled)
#endif
// Clear the alternate reference update pending flag.
- cpi->source_alt_ref_pending = FALSE;
+ cpi->source_alt_ref_pending = 0;
// Set the alternate reference frame active flag
- cpi->source_alt_ref_active = TRUE;
+ cpi->source_alt_ref_active = 1;
}
@@ -2264,12 +2283,12 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
// If we are going to use the ALT reference for the next group of frames set a flag to say so.
if (cpi->oxcf.fixed_q >= 0 &&
cpi->oxcf.play_alternate && !cpi->refresh_alt_ref_frame) {
- cpi->source_alt_ref_pending = TRUE;
+ cpi->source_alt_ref_pending = 1;
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
}
if (!cpi->source_alt_ref_pending)
- cpi->source_alt_ref_active = FALSE;
+ cpi->source_alt_ref_active = 0;
// Decrement count down till next gf
if (cpi->frames_till_gf_update_due > 0)
@@ -2388,7 +2407,7 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) {
static int recode_loop_test(VP9_COMP *cpi,
int high_limit, int low_limit,
int q, int maxq, int minq) {
- int force_recode = FALSE;
+ int force_recode = 0;
VP9_COMMON *cm = &cpi->common;
// Is frame recode allowed at all
@@ -2402,19 +2421,19 @@ static int recode_loop_test(VP9_COMP *cpi,
// General over and under shoot tests
if (((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
((cpi->projected_frame_size < low_limit) && (q > minq))) {
- force_recode = TRUE;
+ force_recode = 1;
}
// Special Constrained quality tests
else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
// Undershoot and below auto cq level
if (q > cpi->cq_target_quality &&
cpi->projected_frame_size < ((cpi->this_frame_target * 7) >> 3)) {
- force_recode = TRUE;
+ force_recode = 1;
} else if (q > cpi->oxcf.cq_level &&
cpi->projected_frame_size < cpi->min_frame_bandwidth &&
cpi->active_best_quality > cpi->oxcf.cq_level) {
// Severe undershoot and between auto and user cq level
- force_recode = TRUE;
+ force_recode = 1;
cpi->active_best_quality = cpi->oxcf.cq_level;
}
}
@@ -2602,7 +2621,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
int frame_over_shoot_limit;
int frame_under_shoot_limit;
- int loop = FALSE;
+ int loop = 0;
int loop_count;
int q_low;
@@ -2610,10 +2629,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
int top_index;
int bottom_index;
- int active_worst_qchanged = FALSE;
+ int active_worst_qchanged = 0;
- int overshoot_seen = FALSE;
- int undershoot_seen = FALSE;
+ int overshoot_seen = 0;
+ int undershoot_seen = 0;
SPEED_FEATURES *sf = &cpi->sf;
#if RESET_FOREACH_FILTER
@@ -2673,9 +2692,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->zbin_mode_boost = 0;
// if (cpi->oxcf.lossless)
- cpi->zbin_mode_boost_enabled = FALSE;
+ cpi->zbin_mode_boost_enabled = 0;
// else
- // cpi->zbin_mode_boost_enabled = TRUE;
+ // cpi->zbin_mode_boost_enabled = 1;
// Current default encoder behaviour for the altref sign bias
cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->source_alt_ref_active;
@@ -2707,7 +2726,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
// The alternate reference frame cannot be active for a key frame
- cpi->source_alt_ref_active = FALSE;
+ cpi->source_alt_ref_active = 0;
// Reset the RD threshold multipliers to default of * 1 (128)
for (i = 0; i < MAX_MODES; i++)
@@ -2964,43 +2983,41 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_set_quantizer(cpi, q);
if (loop_count == 0) {
+ int k;
// setup skip prob for costing in mode/mv decision
- if (cpi->common.mb_no_coeff_skip) {
- int k;
- for (k = 0; k < MBSKIP_CONTEXTS; k++)
- cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[q][k];
-
- if (cm->frame_type != KEY_FRAME) {
- if (cpi->refresh_alt_ref_frame) {
- for (k = 0; k < MBSKIP_CONTEXTS; k++) {
- if (cpi->last_skip_false_probs[2][k] != 0)
- cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k];
- }
- } else if (cpi->refresh_golden_frame) {
- for (k = 0; k < MBSKIP_CONTEXTS; k++) {
- if (cpi->last_skip_false_probs[1][k] != 0)
- cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k];
- }
- } else {
- int k;
- for (k = 0; k < MBSKIP_CONTEXTS; k++) {
- if (cpi->last_skip_false_probs[0][k] != 0)
- cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k];
- }
+ for (k = 0; k < MBSKIP_CONTEXTS; k++)
+ cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[q][k];
+
+ if (cm->frame_type != KEY_FRAME) {
+ if (cpi->refresh_alt_ref_frame) {
+ for (k = 0; k < MBSKIP_CONTEXTS; k++) {
+ if (cpi->last_skip_false_probs[2][k] != 0)
+ cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k];
+ }
+ } else if (cpi->refresh_golden_frame) {
+ for (k = 0; k < MBSKIP_CONTEXTS; k++) {
+ if (cpi->last_skip_false_probs[1][k] != 0)
+ cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k];
+ }
+ } else {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; k++) {
+ if (cpi->last_skip_false_probs[0][k] != 0)
+ cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k];
}
+ }
- // as this is for cost estimate, let's make sure it does not
- // get extreme either way
- {
- int k;
- for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
- cm->mbskip_pred_probs[k] = clamp(cm->mbskip_pred_probs[k],
- 5, 250);
+ // as this is for cost estimate, let's make sure it does not
+ // get extreme either way
+ {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
+ cm->mbskip_pred_probs[k] = clamp(cm->mbskip_pred_probs[k],
+ 5, 250);
- if (cpi->is_src_frame_alt_ref)
- cm->mbskip_pred_probs[k] = 1;
- }
+ if (cpi->is_src_frame_alt_ref)
+ cm->mbskip_pred_probs[k] = 1;
}
}
}
@@ -3049,7 +3066,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (frame_over_shoot_limit == 0)
frame_over_shoot_limit = 1;
- active_worst_qchanged = FALSE;
+ active_worst_qchanged = 0;
// Special case handling for forced key frames
if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
@@ -3128,7 +3145,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
}
- overshoot_seen = TRUE;
+ overshoot_seen = 1;
} else {
// Frame is too small
q_high = q > q_low ? q - 1 : q_low;
@@ -3161,7 +3178,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
}
- undershoot_seen = TRUE;
+ undershoot_seen = 1;
}
// Clamp Q to upper and lower limits:
@@ -3169,11 +3186,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
loop = q != last_q;
} else {
- loop = FALSE;
+ loop = 0;
}
if (cpi->is_src_frame_alt_ref)
- loop = FALSE;
+ loop = 0;
if (!loop && cm->frame_type != KEY_FRAME && sf->search_best_filter) {
if (mcomp_filter_index < mcomp_filters) {
@@ -3186,7 +3203,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (mcomp_filter_index < mcomp_filters) {
cm->mcomp_filter_type = mcomp_filters_to_search[mcomp_filter_index];
loop_count = -1;
- loop = TRUE;
+ loop = 1;
} else {
int f;
int64_t best_cost = mcomp_filter_cost[0];
@@ -3199,7 +3216,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
if (mcomp_best_filter != mcomp_filters_to_search[mcomp_filters - 1]) {
loop_count = -1;
- loop = TRUE;
+ loop = 1;
cm->mcomp_filter_type = mcomp_best_filter;
}
/*
@@ -3210,8 +3227,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
#if RESET_FOREACH_FILTER
if (loop) {
- overshoot_seen = FALSE;
- undershoot_seen = FALSE;
+ overshoot_seen = 0;
+ undershoot_seen = 0;
q_low = q_low0;
q_high = q_high0;
q = Q0;
@@ -3328,6 +3345,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count);
vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count);
vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
+ vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count);
#if CONFIG_COMP_INTERINTRA_PRED
vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count);
#endif
@@ -3791,7 +3809,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
#if CONFIG_MULTIPLE_ARF
if (!cpi->multi_arf_enabled)
#endif
- cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag.
+ cpi->source_alt_ref_pending = 0; // Clear Pending altf Ref flag.
}
}
@@ -3804,11 +3822,11 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
#if CONFIG_MULTIPLE_ARF
// Is this frame the ARF overlay.
- cpi->is_src_frame_alt_ref = FALSE;
+ cpi->is_src_frame_alt_ref = 0;
for (i = 0; i < cpi->arf_buffered; ++i) {
if (cpi->source == cpi->alt_ref_source[i]) {
- cpi->is_src_frame_alt_ref = TRUE;
- cpi->refresh_golden_frame = TRUE;
+ cpi->is_src_frame_alt_ref = 1;
+ cpi->refresh_golden_frame = 1;
break;
}
}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index f3209a3f7..197ede20d 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -103,6 +103,7 @@ typedef struct {
vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
+ vp9_prob partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1];
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS - 1];
@@ -277,26 +278,6 @@ typedef struct {
int static_segmentation;
} SPEED_FEATURES;
-typedef struct {
- MACROBLOCK mb;
- int totalrate;
-} MB_ROW_COMP;
-
-typedef struct {
- TOKENEXTRA *start;
- TOKENEXTRA *stop;
-} TOKENLIST;
-
-typedef struct {
- int ithread;
- void *ptr1;
- void *ptr2;
-} ENCODETHREAD_DATA;
-typedef struct {
- int ithread;
- void *ptr1;
-} LPFTHREAD_DATA;
-
enum BlockSize {
BLOCK_16X8 = PARTITIONING_16X8,
BLOCK_8X16 = PARTITIONING_8X16,
@@ -305,6 +286,12 @@ enum BlockSize {
BLOCK_16X16,
BLOCK_MAX_SEGMENTS,
BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
+#if CONFIG_SBSEGMENT
+ BLOCK_32X16,
+ BLOCK_16X32,
+ BLOCK_64X32,
+ BLOCK_32X64,
+#endif
BLOCK_64X64,
BLOCK_MAX_SB_SEGMENTS,
};
@@ -469,8 +456,6 @@ typedef struct VP9_COMP {
int cq_target_quality;
- int sb32_count[2];
- int sb64_count[2];
int sb_ymode_count [VP9_I32X32_MODES];
int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */
int bmode_count[VP9_NKF_BINTRAMODES];
@@ -478,6 +463,7 @@ typedef struct VP9_COMP {
int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS];
int mbsplit_count[VP9_NUMMBSPLITS];
int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES];
+ unsigned int partition_count[PARTITION_PLANES][PARTITION_TYPES];
#if CONFIG_COMP_INTERINTRA_PRED
unsigned int interintra_count[2];
unsigned int interintra_select_count[2];
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 83fa1dafd..1401bd64e 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -477,7 +477,7 @@ void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize);
- const int bhl = mb_width_log2(bsize);
+ const int bhl = mb_height_log2(bsize);
const int uoff = 16 << (bhl + bwl);
int i;
@@ -487,7 +487,7 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize);
- const int bhl = mb_width_log2(bsize);
+ const int bhl = mb_height_log2(bsize);
const int uoff = 16 << (bhl + bwl);
int i;
@@ -497,7 +497,7 @@ void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize);
- const int bhl = mb_width_log2(bsize);
+ const int bhl = mb_height_log2(bsize);
const int uoff = 16 << (bhl + bwl);
int i;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index fc22146c3..96d857fe7 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -141,6 +141,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob);
vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob);
vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob);
+ vp9_copy(cc->partition_prob, cm->fc.partition_prob);
// Stats
#ifdef MODE_STATS
@@ -202,6 +203,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob);
vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob);
vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob);
+ vp9_copy(cm->fc.partition_prob, cc->partition_prob);
// Stats
#ifdef MODE_STATS
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1d0715499..2f29b1dc3 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -322,6 +322,11 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
fill_nzc_costs(cpi, TX_32X32);
#endif
+ for (i = 0; i < 2; i++)
+ vp9_cost_tokens(cpi->mb.partition_cost[i],
+ cpi->common.fc.partition_prob[i],
+ vp9_partition_tree);
+
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
vp9_init_mode_costs(cpi);
@@ -347,50 +352,6 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
return error;
}
-int vp9_uvsse(MACROBLOCK *x) {
- uint8_t *uptr, *vptr;
- uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
- uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
- int uv_stride = x->block[16].src_stride;
-
- unsigned int sse1 = 0;
- unsigned int sse2 = 0;
- int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
- int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
- int offset;
- int pre_stride = x->e_mbd.block[16].pre_stride;
-
- if (mv_row < 0)
- mv_row -= 1;
- else
- mv_row += 1;
-
- if (mv_col < 0)
- mv_col -= 1;
- else
- mv_col += 1;
-
- mv_row /= 2;
- mv_col /= 2;
-
- offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
- uptr = x->e_mbd.pre.u_buffer + offset;
- vptr = x->e_mbd.pre.v_buffer + offset;
-
- if ((mv_row | mv_col) & 7) {
- vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
- (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
- vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
- (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
- sse2 += sse1;
- } else {
- vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
- vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
- sse2 += sse1;
- }
- return sse2;
-}
-
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
int ib, PLANE_TYPE type,
ENTROPY_CONTEXT *a,
@@ -610,10 +571,10 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
- vp9_prob skip_prob = cm->mb_no_coeff_skip ?
- vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
+ vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
int64_t rd[TX_SIZE_MAX_SB][2];
int n, m;
+ int s0, s1;
for (n = TX_4X4; n <= max_txfm_size; n++) {
r[n][1] = r[n][0];
@@ -625,25 +586,16 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- if (cm->mb_no_coeff_skip) {
- int s0, s1;
+ assert(skip_prob > 0);
+ s0 = vp9_cost_bit(skip_prob, 0);
+ s1 = vp9_cost_bit(skip_prob, 1);
- assert(skip_prob > 0);
- s0 = vp9_cost_bit(skip_prob, 0);
- s1 = vp9_cost_bit(skip_prob, 1);
-
- for (n = TX_4X4; n <= max_txfm_size; n++) {
- if (s[n]) {
- rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
- } else {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
- }
- }
- } else {
- for (n = TX_4X4; n <= max_txfm_size; n++) {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
+ for (n = TX_4X4; n <= max_txfm_size; n++) {
+ if (s[n]) {
+ rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
+ } else {
+ rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
+ rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
}
@@ -2537,7 +2489,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int_mv this_mv;
int i;
- int zero_seen = FALSE;
+ int zero_seen = 0;
int best_index = 0;
int best_sad = INT_MAX;
int this_sad = INT_MAX;
@@ -2753,7 +2705,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
int idx, MV_REFERENCE_FRAME frame_type,
- int block_size,
+ enum BlockSize block_size,
int mb_row, int mb_col,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
int_mv frame_near_mv[MAX_REF_FRAMES],
@@ -2854,6 +2806,44 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep,
vp9_clear_system_state();
}
+static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
+ switch (bs) {
+ case BLOCK_64X64: return BLOCK_32X32;
+#if CONFIG_SBSEGMENT
+ case BLOCK_64X32: return BLOCK_32X16;
+ case BLOCK_32X64: return BLOCK_16X32;
+#endif
+ case BLOCK_32X32: return BLOCK_16X16;
+#if CONFIG_SBSEGMENT
+ case BLOCK_32X16: return BLOCK_16X8;
+ case BLOCK_16X32: return BLOCK_8X16;
+#endif
+ case BLOCK_16X16: return BLOCK_8X8;
+ default:
+ assert(0);
+ return -1;
+ }
+}
+
+static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) {
+ switch (bs) {
+ case BLOCK_SIZE_SB64X64: return BLOCK_64X64;
+#if CONFIG_SBSEGMENT
+ case BLOCK_SIZE_SB64X32: return BLOCK_64X32;
+ case BLOCK_SIZE_SB32X64: return BLOCK_32X64;
+#endif
+ case BLOCK_SIZE_SB32X32: return BLOCK_32X32;
+#if CONFIG_SBSEGMENT
+ case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
+ case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
+#endif
+ case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
+ default:
+ assert(0);
+ return -1;
+ }
+}
+
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize,
int *saddone, int near_sadidx[],
@@ -2872,9 +2862,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
[MAX_REF_FRAMES],
YV12_BUFFER_CONFIG *scaled_ref_frame,
int mb_row, int mb_col) {
- const enum BlockSize block_size =
- (bsize == BLOCK_SIZE_MB16X16) ? BLOCK_16X16 :
- (bsize == BLOCK_SIZE_SB32X32) ? BLOCK_32X32 : BLOCK_64X64;
+ const int bw = 1 << mb_width_log2(bsize), bh = 1 << mb_height_log2(bsize);
+ const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
+ const enum BlockSize uv_block_size = y_to_uv_block_size(block_size);
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
@@ -3044,7 +3034,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
- if (bsize == BLOCK_SIZE_SB64X64) {
+ if (bsize != BLOCK_SIZE_MB16X16) {
int switchable_filter_index, newbest;
int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
@@ -3070,20 +3060,26 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_rate_y, tmp_rate_u, tmp_rate_v;
int tmp_dist_y, tmp_dist_u, tmp_dist_v;
vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize);
- var = vp9_variance64x64(*(b->base_src), b->src_stride,
- xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride,
+ &sse);
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
- model_rd_from_var_lapndz(var, 64 * 64, xd->block[0].dequant[1] >> 3,
+ model_rd_from_var_lapndz(var, 16 * bw * 16 * bh,
+ xd->block[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
- var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride,
- xd->dst.u_buffer, xd->dst.uv_stride, &sse);
- model_rd_from_var_lapndz(var, 32 * 32, xd->block[16].dequant[1] >> 3,
+ var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride,
+ &sse);
+ model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
+ xd->block[16].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
- var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride,
- xd->dst.v_buffer, xd->dst.uv_stride, &sse);
- model_rd_from_var_lapndz(var, 32 * 32, xd->block[20].dequant[1] >> 3,
+ var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ &sse);
+ model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
+ xd->block[20].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@@ -3107,97 +3103,18 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(cm->mcomp_filter_type != SWITCHABLE &&
cm->mcomp_filter_type == mbmi->interp_filter)) {
int i;
- for (i = 0; i < 64; ++i)
- vpx_memcpy(tmp_ybuf + i * 64,
+ for (i = 0; i < 16 * bh; ++i)
+ vpx_memcpy(tmp_ybuf + i * 16 * bw,
xd->dst.y_buffer + i * xd->dst.y_stride,
- sizeof(unsigned char) * 64);
- for (i = 0; i < 32; ++i)
- vpx_memcpy(tmp_ubuf + i * 32,
+ sizeof(unsigned char) * 16 * bw);
+ for (i = 0; i < 8 * bh; ++i)
+ vpx_memcpy(tmp_ubuf + i * 8 * bw,
xd->dst.u_buffer + i * xd->dst.uv_stride,
- sizeof(unsigned char) * 32);
- for (i = 0; i < 32; ++i)
- vpx_memcpy(tmp_vbuf + i * 32,
+ sizeof(unsigned char) * 8 * bw);
+ for (i = 0; i < 8 * bh; ++i)
+ vpx_memcpy(tmp_vbuf + i * 8 * bw,
xd->dst.v_buffer + i * xd->dst.uv_stride,
- sizeof(unsigned char) * 32);
- pred_exists = 1;
- }
- interpolating_intpel_seen |=
- intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
- }
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- int switchable_filter_index, newbest;
- int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
- int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
- for (switchable_filter_index = 0;
- switchable_filter_index < VP9_SWITCHABLE_FILTERS;
- ++switchable_filter_index) {
- int rs = 0;
- mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
- const int m = vp9_switchable_interp_map[mbmi->interp_filter];
- rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
- }
- if (interpolating_intpel_seen && intpel_mv &&
- vp9_is_interpolating_filter[mbmi->interp_filter]) {
- rd = RDCOST(x->rdmult, x->rddiv,
- rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
- tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
- } else {
- unsigned int sse, var;
- int tmp_rate_y, tmp_rate_u, tmp_rate_v;
- int tmp_dist_y, tmp_dist_u, tmp_dist_v;
- vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize);
- var = vp9_variance32x32(*(b->base_src), b->src_stride,
- xd->dst.y_buffer, xd->dst.y_stride, &sse);
- // Note our transform coeffs are 8 times an orthogonal transform.
- // Hence quantizer step is also 8 times. To get effective quantizer
- // we need to divide by 8 before sending to modeling function.
- model_rd_from_var_lapndz(var, 32 * 32, xd->block[0].dequant[1] >> 3,
- &tmp_rate_y, &tmp_dist_y);
- var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride,
- xd->dst.u_buffer, xd->dst.uv_stride, &sse);
- model_rd_from_var_lapndz(var, 16 * 16, xd->block[16].dequant[1] >> 3,
- &tmp_rate_u, &tmp_dist_u);
- var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride,
- xd->dst.v_buffer, xd->dst.uv_stride, &sse);
- model_rd_from_var_lapndz(var, 16 * 16, xd->block[20].dequant[1] >> 3,
- &tmp_rate_v, &tmp_dist_v);
- rd = RDCOST(x->rdmult, x->rddiv,
- rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
- tmp_dist_y + tmp_dist_u + tmp_dist_v);
- if (!interpolating_intpel_seen && intpel_mv &&
- vp9_is_interpolating_filter[mbmi->interp_filter]) {
- tmp_rate_y_i = tmp_rate_y;
- tmp_rate_u_i = tmp_rate_u;
- tmp_rate_v_i = tmp_rate_v;
- tmp_dist_y_i = tmp_dist_y;
- tmp_dist_u_i = tmp_dist_u;
- tmp_dist_v_i = tmp_dist_v;
- }
- }
- newbest = (switchable_filter_index == 0 || rd < best_rd);
- if (newbest) {
- best_rd = rd;
- *best_filter = mbmi->interp_filter;
- }
- if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
- (cm->mcomp_filter_type != SWITCHABLE &&
- cm->mcomp_filter_type == mbmi->interp_filter)) {
- int i;
- for (i = 0; i < 32; ++i)
- vpx_memcpy(tmp_ybuf + i * 64,
- xd->dst.y_buffer + i * xd->dst.y_stride,
- sizeof(unsigned char) * 32);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(tmp_ubuf + i * 32,
- xd->dst.u_buffer + i * xd->dst.uv_stride,
- sizeof(unsigned char) * 16);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(tmp_vbuf + i * 32,
- xd->dst.v_buffer + i * xd->dst.uv_stride,
- sizeof(unsigned char) * 16);
+ sizeof(unsigned char) * 8 * bw);
pred_exists = 1;
}
interpolating_intpel_seen |=
@@ -3207,7 +3124,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int switchable_filter_index, newbest;
int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
- assert(bsize == BLOCK_SIZE_MB16X16);
for (switchable_filter_index = 0;
switchable_filter_index < VP9_SWITCHABLE_FILTERS;
++switchable_filter_index) {
@@ -3286,26 +3202,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
if (pred_exists) {
- if (bsize == BLOCK_SIZE_SB64X64) {
- for (i = 0; i < 64; ++i)
- vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
- sizeof(unsigned char) * 64);
- for (i = 0; i < 32; ++i)
- vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
- sizeof(unsigned char) * 32);
- for (i = 0; i < 32; ++i)
- vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
- sizeof(unsigned char) * 32);
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- for (i = 0; i < 32; ++i)
- vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
- sizeof(unsigned char) * 32);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
- sizeof(unsigned char) * 16);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
- sizeof(unsigned char) * 16);
+ // FIXME(rbultje): mb code still predicts into xd->predictor
+ if (bsize != BLOCK_SIZE_MB16X16) {
+ for (i = 0; i < bh * 16; ++i)
+ vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride,
+ tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16);
+ for (i = 0; i < bh * 8; ++i)
+ vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride,
+ tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
+ for (i = 0; i < bh * 8; ++i)
+ vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride,
+ tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
} else {
vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256);
vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64);
@@ -3340,14 +3247,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (threshold < x->encode_breakout)
threshold = x->encode_breakout;
- if (bsize == BLOCK_SIZE_SB64X64) {
- var = vp9_variance64x64(*(b->base_src), b->src_stride,
- xd->dst.y_buffer, xd->dst.y_stride, &sse);
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- var = vp9_variance32x32(*(b->base_src), b->src_stride,
- xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ if (bsize != BLOCK_SIZE_MB16X16) {
+ var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride,
+ &sse);
} else {
- assert(bsize == BLOCK_SIZE_MB16X16);
var = vp9_variance16x16(*(b->base_src), b->src_stride,
xd->predictor, 16, &sse);
}
@@ -3361,23 +3265,23 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Check u and v to make sure skip is ok
int sse2;
- if (bsize == BLOCK_SIZE_SB64X64) {
+ if (bsize != BLOCK_SIZE_MB16X16) {
unsigned int sse2u, sse2v;
- var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride,
- xd->dst.u_buffer, xd->dst.uv_stride, &sse2u);
- var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride,
- xd->dst.v_buffer, xd->dst.uv_stride, &sse2v);
+ // FIXME(rbultje): mb predictors predict into xd->predictor
+ var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer,
+ xd->dst.uv_stride, &sse2u);
+ var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride, &sse2v);
sse2 = sse2u + sse2v;
- } else if (bsize == BLOCK_SIZE_SB32X32) {
+ } else {
unsigned int sse2u, sse2v;
- var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride,
- xd->dst.u_buffer, xd->dst.uv_stride, &sse2u);
- var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride,
- xd->dst.v_buffer, xd->dst.uv_stride, &sse2v);
+ var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
+ xd->predictor + 256, 8, &sse2u);
+ var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
+ xd->predictor + 320, 8, &sse2v);
sse2 = sse2u + sse2v;
- } else {
- assert(bsize == BLOCK_SIZE_MB16X16);
- sse2 = vp9_uvsse(x);
}
if (sse2 * 2 < threshold) {
@@ -3954,44 +3858,41 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// because there are no non zero coefficients and make any
// necessary adjustment for rate. Ignore if skip is coded at
// segment level as the cost wont have been added in.
- if (cpi->common.mb_no_coeff_skip) {
- int mb_skip_allowed;
+ int mb_skip_allowed;
- // Is Mb level skip allowed (i.e. not coded at segment level).
- mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
+ // Is Mb level skip allowed (i.e. not coded at segment level).
+ mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
- if (skippable) {
- mbmi->mb_skip_coeff = 1;
+ if (skippable) {
+ mbmi->mb_skip_coeff = 1;
- // Back out the coefficient coding costs
- rate2 -= (rate_y + rate_uv);
- // for best_yrd calculation
- rate_uv = 0;
+ // Back out the coefficient coding costs
+ rate2 -= (rate_y + rate_uv);
+ // for best_yrd calculation
+ rate_uv = 0;
- if (mb_skip_allowed) {
- int prob_skip_cost;
+ if (mb_skip_allowed) {
+ int prob_skip_cost;
- // Cost the skip mb case
- vp9_prob skip_prob =
- vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP);
+ // Cost the skip mb case
+ vp9_prob skip_prob =
+ vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP);
- if (skip_prob) {
- prob_skip_cost = vp9_cost_bit(skip_prob, 1);
- rate2 += prob_skip_cost;
- other_cost += prob_skip_cost;
- }
- }
- }
- // Add in the cost of the no skip flag.
- else {
- mbmi->mb_skip_coeff = 0;
- if (mb_skip_allowed) {
- int prob_skip_cost = vp9_cost_bit(
- vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0);
+ if (skip_prob) {
+ prob_skip_cost = vp9_cost_bit(skip_prob, 1);
rate2 += prob_skip_cost;
other_cost += prob_skip_cost;
}
}
+ } else {
+ // Add in the cost of the no skip flag.
+ mbmi->mb_skip_coeff = 0;
+ if (mb_skip_allowed) {
+ int prob_skip_cost = vp9_cost_bit(
+ vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
}
// Calculate the final RD estimate for this mode.
@@ -4181,8 +4082,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame = ALTREF_FRAME;
mbmi->mv[0].as_int = 0;
mbmi->uv_mode = DC_PRED;
- mbmi->mb_skip_coeff =
- (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+ mbmi->mb_skip_coeff = 1;
mbmi->partitioning = 0;
set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
scale_factor);
@@ -4261,22 +4161,22 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t txfm_cache[NB_TXFM_MODES], err;
int i;
+ ctx->skip = 0;
xd->mode_info_context->mbmi.mode = DC_PRED;
err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, &y_skip, bsize, txfm_cache);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize);
- if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) {
+ if (y_skip && uv_skip) {
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
*returndist = dist_y + (dist_uv >> 2);
memset(ctx->txfm_rd_diff, 0,
sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff));
} else {
- *returnrate = rate_y + rate_uv;
- if (cpi->common.mb_no_coeff_skip)
- *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ *returnrate = rate_y + rate_uv +
+ vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
for (i = 0; i < NB_TXFM_MODES; i++) {
ctx->txfm_rd_diff[i] = err - txfm_cache[i];
@@ -4310,6 +4210,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
TX_SIZE txfm_size_16x16, txfm_size_8x8;
int i;
+ x->mb_context[xd->sb_index][xd->mb_index].skip = 0;
mbmi->ref_frame = INTRA_FRAME;
mbmi->mode = DC_PRED;
for (i = 0; i <= TX_8X8; i++) {
@@ -4327,7 +4228,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE_MB16X16, txfm_cache[1]);
mode16x16 = mbmi->mode;
txfm_size_16x16 = mbmi->txfm_size;
- if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+ if (y_intra16x16_skippable &&
((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) ||
(cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) {
error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0);
@@ -4359,7 +4260,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
mbmi->mb_skip_coeff = 0;
- if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+ if (y_intra16x16_skippable &&
((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) ||
(cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) {
mbmi->mb_skip_coeff = 1;
@@ -4385,8 +4286,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
dist = dist16x16 + (distuv[mbmi->txfm_size != TX_4X4] >> 2);
mbmi->uv_mode = modeuv[mbmi->txfm_size != TX_4X4];
}
- if (cpi->common.mb_no_coeff_skip)
- rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
} else {
if (error4x4 < error8x8) {
rate = rateuv[TX_4X4] + rate4x4;
@@ -4401,8 +4301,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
rate = rate8x8 + rateuv[TX_4X4];
dist = dist8x8 + (distuv[TX_4X4] >> 2);
}
- if (cpi->common.mb_no_coeff_skip)
- rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
}
for (i = 0; i < NB_TXFM_MODES; i++) {
@@ -4420,8 +4319,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int *returndistortion,
BLOCK_SIZE_TYPE bsize,
PICK_MODE_CONTEXT *ctx) {
- const int block_size = (bsize == BLOCK_SIZE_SB64X64) ?
- BLOCK_64X64 : BLOCK_32X32;
+ const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
@@ -4686,39 +4584,36 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// because there are no non zero coefficients and make any
// necessary adjustment for rate. Ignore if skip is coded at
// segment level as the cost wont have been added in.
- if (cpi->common.mb_no_coeff_skip) {
- int mb_skip_allowed;
+ int mb_skip_allowed;
- // Is Mb level skip allowed (i.e. not coded at segment level).
- mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
+ // Is Mb level skip allowed (i.e. not coded at segment level).
+ mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
- if (skippable) {
- // Back out the coefficient coding costs
- rate2 -= (rate_y + rate_uv);
- // for best_yrd calculation
- rate_uv = 0;
+ if (skippable) {
+ // Back out the coefficient coding costs
+ rate2 -= (rate_y + rate_uv);
+ // for best_yrd calculation
+ rate_uv = 0;
- if (mb_skip_allowed) {
- int prob_skip_cost;
+ if (mb_skip_allowed) {
+ int prob_skip_cost;
- // Cost the skip mb case
- vp9_prob skip_prob =
- vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
+ // Cost the skip mb case
+ vp9_prob skip_prob =
+ vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
- if (skip_prob) {
- prob_skip_cost = vp9_cost_bit(skip_prob, 1);
- rate2 += prob_skip_cost;
- other_cost += prob_skip_cost;
- }
+ if (skip_prob) {
+ prob_skip_cost = vp9_cost_bit(skip_prob, 1);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
}
}
+ } else if (mb_skip_allowed) {
// Add in the cost of the no skip flag.
- else if (mb_skip_allowed) {
- int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
- PRED_MBSKIP), 0);
- rate2 += prob_skip_cost;
- other_cost += prob_skip_cost;
- }
+ int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
+ PRED_MBSKIP), 0);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
}
// Calculate the final RD estimate for this mode.
@@ -4886,7 +4781,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->second_ref_frame = INTRA_FRAME;
mbmi->mv[0].as_int = 0;
mbmi->uv_mode = DC_PRED;
- mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+ mbmi->mb_skip_coeff = 1;
mbmi->partitioning = 0;
mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ?
TX_32X32 : cm->txfm_mode;
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index af5526dce..dbadaea74 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -23,6 +23,54 @@ unsigned int vp9_sad64x64_c(const uint8_t *src_ptr,
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64);
}
+#if CONFIG_SBSEGMENT
+unsigned int vp9_sad64x32_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 32);
+}
+
+void vp9_sad64x32x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad64x32(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad64x32(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad64x32(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad64x32(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+unsigned int vp9_sad32x64_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 64);
+}
+
+void vp9_sad32x64x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad32x64(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x64(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x64(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad32x64(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+#endif
+
unsigned int vp9_sad32x32_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
@@ -31,6 +79,54 @@ unsigned int vp9_sad32x32_c(const uint8_t *src_ptr,
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32);
}
+#if CONFIG_SBSEGMENT
+unsigned int vp9_sad32x16_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 16);
+}
+
+void vp9_sad32x16x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad32x16(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x16(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x16(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad32x16(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+unsigned int vp9_sad16x32_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 32);
+}
+
+void vp9_sad16x32x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad16x32(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x32(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x32(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x32(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+#endif
+
unsigned int vp9_sad16x16_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index a04a20c29..aac42f738 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -212,15 +212,15 @@ static void count_segs(VP9_COMP *cpi,
int *no_pred_segcounts,
int (*temporal_predictor_count)[2],
int (*t_unpred_seg_counts)[MAX_MB_SEGMENTS],
- int mb_size, int mb_row, int mb_col) {
+ int bw, int bh, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
const int segmap_index = mb_row * cm->mb_cols + mb_col;
const int segment_id = mi->mbmi.segment_id;
xd->mode_info_context = mi;
- set_mb_row(cm, xd, mb_row, mb_size);
- set_mb_col(cm, xd, mb_col, mb_size);
+ set_mb_row(cm, xd, mb_row, bh);
+ set_mb_col(cm, xd, mb_col, bw);
// Count the number of hits on each segment with no prediction
no_pred_segcounts[segment_id]++;
@@ -290,7 +290,22 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) {
if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
- t_unpred_seg_counts, 4, mb_row, mb_col);
+ t_unpred_seg_counts, 4, 4, mb_row, mb_col);
+#if CONFIG_SBSEGMENT
+ } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X32) {
+ count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, 4, 2, mb_row, mb_col);
+ if (mb_row + 2 != cm->mb_rows)
+ count_segs(cpi, mi + 2 * mis, no_pred_segcounts,
+ temporal_predictor_count,
+ t_unpred_seg_counts, 4, 2, mb_row + 2, mb_col);
+ } else if (mi->mbmi.sb_type == BLOCK_SIZE_SB32X64) {
+ count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, 2, 4, mb_row, mb_col);
+ if (mb_col + 2 != cm->mb_cols)
+ count_segs(cpi, mi + 2, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, 2, 4, mb_row, mb_col + 2);
+#endif
} else {
for (i = 0; i < 4; i++) {
int x_idx = (i & 1) << 1, y_idx = i & 2;
@@ -301,11 +316,32 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
continue;
}
- if (sb_mi->mbmi.sb_type) {
- assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
+ if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ count_segs(cpi, sb_mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, 2, 2,
+ mb_row + y_idx, mb_col + x_idx);
+#if CONFIG_SBSEGMENT
+ } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X16) {
+ count_segs(cpi, sb_mi, no_pred_segcounts,
+ temporal_predictor_count,
+ t_unpred_seg_counts, 2, 1,
+ mb_row + y_idx, mb_col + x_idx);
+ if (mb_row + y_idx + 1 != cm->mb_rows)
+ count_segs(cpi, sb_mi + mis, no_pred_segcounts,
+ temporal_predictor_count,
+ t_unpred_seg_counts, 2, 1,
+ mb_row + y_idx + 1, mb_col + x_idx);
+ } else if (sb_mi->mbmi.sb_type == BLOCK_SIZE_SB16X32) {
count_segs(cpi, sb_mi, no_pred_segcounts,
- temporal_predictor_count, t_unpred_seg_counts, 2,
+ temporal_predictor_count,
+ t_unpred_seg_counts, 1, 2,
mb_row + y_idx, mb_col + x_idx);
+ if (mb_col + x_idx + 1 != cm->mb_cols)
+ count_segs(cpi, sb_mi + 1, no_pred_segcounts,
+ temporal_predictor_count,
+ t_unpred_seg_counts, 1, 2,
+ mb_row + y_idx, mb_col + x_idx + 1);
+#endif
} else {
int j;
@@ -322,7 +358,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16);
count_segs(cpi, mb_mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
- 1, mb_row + y_idx_mb, mb_col + x_idx_mb);
+ 1, 1, mb_row + y_idx_mb, mb_col + x_idx_mb);
}
}
}
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 891eb25fd..c97f21e31 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -154,13 +154,50 @@ static void tokenize_b(VP9_COMP *cpi,
l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+#if CONFIG_SBSEGMENT
+ } else if (sb_type == BLOCK_SIZE_SB32X64) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb32x64[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context +
+ vp9_block2left_sb32x64[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a3 = l2 = l3 = NULL;
+ } else if (sb_type == BLOCK_SIZE_SB64X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb64x32[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context +
+ vp9_block2left_sb64x32[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a3 = l2 = l3 = NULL;
+#endif
} else if (sb_type == BLOCK_SIZE_SB32X32) {
a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib];
l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib];
a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
a2 = a3 = l2 = l3 = NULL;
+#if CONFIG_SBSEGMENT
+ } else if (sb_type == BLOCK_SIZE_SB16X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb16x32[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context +
+ vp9_block2left_sb16x32[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a1 = l1 = a2 = l2 = a3 = l3 = NULL;
+ } else if (sb_type == BLOCK_SIZE_SB32X16) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb32x16[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context +
+ vp9_block2left_sb32x16[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a1 = l1 = a2 = l2 = a3 = l3 = NULL;
+#endif
} else {
+ assert(sb_type == BLOCK_SIZE_MB16X16);
a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib];
l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib];
a1 = l1 = a2 = l2 = a3 = l3 = NULL;
@@ -290,7 +327,7 @@ static void tokenize_b(VP9_COMP *cpi,
else
#endif
t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0);
- assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
+ assert(vp9_coef_encodings[t->Token].len - t->skip_eob_node > 0);
if (!dry_run) {
++counts[type][ref][band][pt][token];
if (!t->skip_eob_node)
@@ -397,11 +434,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi,
if (mbmi->mb_skip_coeff) {
if (!dry_run)
cpi->skip_true_count[mb_skip_context] += skip_inc;
- if (!cm->mb_no_coeff_skip) {
- vp9_stuff_sb(cpi, xd, t, dry_run, bsize);
- } else {
- vp9_reset_sb_tokens_context(xd, bsize);
- }
+ vp9_reset_sb_tokens_context(xd, bsize);
if (dry_run)
*t = t_backup;
return;
@@ -490,11 +523,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
if (!dry_run)
cpi->skip_true_count[mb_skip_context] += skip_inc;
- if (!cpi->common.mb_no_coeff_skip) {
- vp9_stuff_mb(cpi, xd, t, dry_run);
- } else {
- vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
- }
+ vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
if (dry_run)
*t = t_backup;
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
index 832471aa8..af2c122e0 100644
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -23,7 +23,8 @@ typedef BOOL_CODER vp9_writer;
#define vp9_write encode_bool
#define vp9_write_literal vp9_encode_value
-#define vp9_write_bit(W, V) vp9_write(W, V, vp9_prob_half)
+#define vp9_write_bit(w, v) vp9_write((w), (v), vp9_prob_half)
+#define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8)
/* Approximate length of an encoded bool in 256ths of a bit at given prob */
@@ -67,11 +68,9 @@ static INLINE void treed_write(vp9_writer *const w,
} while (n);
}
-static INLINE void write_token(vp9_writer *const w,
- vp9_tree t,
- const vp9_prob *const p,
- vp9_token *const x) {
- treed_write(w, t, p, x->value, x->Len);
+static INLINE void write_token(vp9_writer *w, vp9_tree t, const vp9_prob *p,
+ const struct vp9_token *x) {
+ treed_write(w, t, p, x->value, x->len);
}
static INLINE int treed_cost(vp9_tree t,
@@ -91,10 +90,9 @@ static INLINE int treed_cost(vp9_tree t,
return c;
}
-static INLINE int cost_token(vp9_tree t,
- const vp9_prob *const p,
- vp9_token *const x) {
- return treed_cost(t, p, x->value, x->Len);
+static INLINE int cost_token(vp9_tree t, const vp9_prob *p,
+ const struct vp9_token *x) {
+ return treed_cost(t, p, x->value, x->len);
}
/* Fill array of costs for all possible token values. */
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index d07a65b45..fb66f4f0b 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -24,6 +24,144 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
return sum;
}
+#if CONFIG_SBSEGMENT
+unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 11));
+}
+
+unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering
+ uint8_t temp2[68 * 64];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 33, 64, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
+
+ return vp9_variance64x32_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 11));
+}
+
+unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering
+ uint8_t temp2[68 * 64];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 65, 32, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 32, vfilter);
+
+ return vp9_variance32x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 9));
+}
+
+unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering
+ uint8_t temp2[36 * 32];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 32, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
+
+ return vp9_variance32x16_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 9));
+}
+
+unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering
+ uint8_t temp2[36 * 32];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 33, 16, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 16, vfilter);
+
+ return vp9_variance16x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
+}
+#endif
+
unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@@ -139,17 +277,18 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
uint8_t temp2[20 * 16];
- const int16_t *HFilter, *VFilter;
- uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering
+ const int16_t *hfilter, *vfilter;
+ uint16_t fdata3[5 * 4]; // Temp data bufffer used in filtering
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
// First filter 1d Horizontal
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 5, 4, hfilter);
// Now filter Verticaly
- var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
}
@@ -162,15 +301,16 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t FData3[9 * 8]; // Temp data bufffer used in filtering
+ uint16_t fdata3[9 * 8]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
- const int16_t *HFilter, *VFilter;
+ const int16_t *hfilter, *vfilter;
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 9, 8, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
@@ -182,15 +322,16 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t FData3[17 * 16]; // Temp data bufffer used in filtering
+ uint16_t fdata3[17 * 16]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
- const int16_t *HFilter, *VFilter;
+ const int16_t *hfilter, *vfilter;
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 16, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
@@ -202,16 +343,16 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering
+ uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering
uint8_t temp2[68 * 64];
- const int16_t *HFilter, *VFilter;
+ const int16_t *hfilter, *vfilter;
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
- 1, 65, 64, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 65, 64, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
}
@@ -223,15 +364,16 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t FData3[33 * 32]; // Temp data bufffer used in filtering
+ uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering
uint8_t temp2[36 * 32];
- const int16_t *HFilter, *VFilter;
+ const int16_t *hfilter, *vfilter;
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 33, 32, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
}
@@ -363,15 +505,16 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t FData3[16 * 9]; // Temp data bufffer used in filtering
+ uint16_t fdata3[16 * 9]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
- const int16_t *HFilter, *VFilter;
+ const int16_t *hfilter, *vfilter;
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 9, 16, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
@@ -383,16 +526,16 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t FData3[9 * 16]; // Temp data bufffer used in filtering
+ uint16_t fdata3[9 * 16]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
- const int16_t *HFilter, *VFilter;
+ const int16_t *hfilter, *vfilter;
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
- var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
- 1, 17, 8, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 8, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
diff --git a/vp9/encoder/x86/vp9_dct_sse2.asm b/vp9/encoder/x86/vp9_dct_sse2.asm
deleted file mode 100644
index bbd6086da..000000000
--- a/vp9/encoder/x86/vp9_dct_sse2.asm
+++ /dev/null
@@ -1,432 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%macro STACK_FRAME_CREATE 0
-%if ABI_IS_32BIT
- %define input rsi
- %define output rdi
- %define pitch rax
- push rbp
- mov rbp, rsp
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0)
- mov rdi, arg(1)
-
- movsxd rax, dword ptr arg(2)
- lea rcx, [rsi + rax*2]
-%else
- %if LIBVPX_YASM_WIN64
- %define input rcx
- %define output rdx
- %define pitch r8
- SAVE_XMM 7, u
- %else
- %define input rdi
- %define output rsi
- %define pitch rdx
- %endif
-%endif
-%endmacro
-
-%macro STACK_FRAME_DESTROY 0
- %define input
- %define output
- %define pitch
-
-%if ABI_IS_32BIT
- pop rdi
- pop rsi
- RESTORE_GOT
- pop rbp
-%else
- %if LIBVPX_YASM_WIN64
- RESTORE_XMM
- %endif
-%endif
- ret
-%endmacro
-
-;void vp9_short_fdct4x4_sse2(short *input, short *output, int pitch)
-global sym(vp9_short_fdct4x4_sse2) PRIVATE
-sym(vp9_short_fdct4x4_sse2):
-
- STACK_FRAME_CREATE
-
- movq xmm0, MMWORD PTR[input ] ;03 02 01 00
- movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10
- lea input, [input+2*pitch]
- movq xmm1, MMWORD PTR[input ] ;23 22 21 20
- movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30
-
- punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00
- punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20
-
- movdqa xmm2, xmm0
- punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00
- punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10
- movdqa xmm1, xmm0
- punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00
- pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx
- pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx
-
- punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03
- movdqa xmm3, xmm0
- paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1
- psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1
- psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
- psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
-
- movdqa xmm1, xmm0
- pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
- pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
- movdqa xmm4, xmm3
- pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
- pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352
-
- paddd xmm3, XMMWORD PTR[GLOBAL(_14500)]
- paddd xmm4, XMMWORD PTR[GLOBAL(_7500)]
- psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12
- psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12
-
- packssdw xmm0, xmm1 ;op[2] op[0]
- packssdw xmm3, xmm4 ;op[3] op[1]
- ; 23 22 21 20 03 02 01 00
- ;
- ; 33 32 31 30 13 12 11 10
- ;
- movdqa xmm2, xmm0
- punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00
- punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30
-
- movdqa xmm3, xmm0
- punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00
- punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01
- movdqa xmm2, xmm0
- punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00
- punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20
-
- movdqa xmm5, XMMWORD PTR[GLOBAL(_7)]
- pshufd xmm2, xmm2, 04eh
- movdqa xmm3, xmm0
- paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1
- psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1
-
- pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1
- movdqa xmm2, xmm3 ;save d1 for compare
- pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1
- pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1
- pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1
- pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1
- pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1
- movdqa xmm1, xmm0
- pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
- pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
-
- pxor xmm4, xmm4 ;zero out for compare
- paddd xmm0, xmm5
- paddd xmm1, xmm5
- pcmpeqw xmm2, xmm4
- psrad xmm0, 4 ;(a1 + b1 + 7)>>4
- psrad xmm1, 4 ;(a1 - b1 + 7)>>4
- pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper,
- ;and keep bit 0 of lower
-
- movdqa xmm4, xmm3
- pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
- pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352
- paddd xmm3, XMMWORD PTR[GLOBAL(_12000)]
- paddd xmm4, XMMWORD PTR[GLOBAL(_51000)]
- packssdw xmm0, xmm1 ;op[8] op[0]
- psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16
- psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16
-
- packssdw xmm3, xmm4 ;op[12] op[4]
- movdqa xmm1, xmm0
- paddw xmm3, xmm2 ;op[4] += (d1!=0)
- punpcklqdq xmm0, xmm3 ;op[4] op[0]
- punpckhqdq xmm1, xmm3 ;op[12] op[8]
-
- movdqa XMMWORD PTR[output + 0], xmm0
- movdqa XMMWORD PTR[output + 16], xmm1
-
- STACK_FRAME_DESTROY
-
-;void vp9_short_fdct8x4_sse2(short *input, short *output, int pitch)
-global sym(vp9_short_fdct8x4_sse2) PRIVATE
-sym(vp9_short_fdct8x4_sse2):
-
- STACK_FRAME_CREATE
-
- ; read the input data
- movdqa xmm0, [input ]
- movdqa xmm2, [input+ pitch]
- lea input, [input+2*pitch]
- movdqa xmm4, [input ]
- movdqa xmm3, [input+ pitch]
-
- ; transpose for the first stage
- movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
- movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
-
- punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
- punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
-
- punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
- punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
-
- movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
- punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
-
- punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
-
- movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
- punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
-
- punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
- movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
-
- punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
- punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
-
- movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
- punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
-
- punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
-
- ; xmm0 0
- ; xmm1 1
- ; xmm2 2
- ; xmm3 3
-
- ; first stage
- movdqa xmm5, xmm0
- movdqa xmm4, xmm1
-
- paddw xmm0, xmm3 ; a1 = 0 + 3
- paddw xmm1, xmm2 ; b1 = 1 + 2
-
- psubw xmm4, xmm2 ; c1 = 1 - 2
- psubw xmm5, xmm3 ; d1 = 0 - 3
-
- psllw xmm5, 3
- psllw xmm4, 3
-
- psllw xmm0, 3
- psllw xmm1, 3
-
- ; output 0 and 2
- movdqa xmm2, xmm0 ; a1
-
- paddw xmm0, xmm1 ; op[0] = a1 + b1
- psubw xmm2, xmm1 ; op[2] = a1 - b1
-
- ; output 1 and 3
- ; interleave c1, d1
- movdqa xmm1, xmm5 ; d1
- punpcklwd xmm1, xmm4 ; c1 d1
- punpckhwd xmm5, xmm4 ; c1 d1
-
- movdqa xmm3, xmm1
- movdqa xmm4, xmm5
-
- pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
- pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
-
- pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
- pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
-
- paddd xmm1, XMMWORD PTR[GLOBAL(_14500)]
- paddd xmm4, XMMWORD PTR[GLOBAL(_14500)]
- paddd xmm3, XMMWORD PTR[GLOBAL(_7500)]
- paddd xmm5, XMMWORD PTR[GLOBAL(_7500)]
-
- psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
- psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
- psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
- psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
-
- packssdw xmm1, xmm4 ; op[1]
- packssdw xmm3, xmm5 ; op[3]
-
- ; done with vertical
- ; transpose for the second stage
- movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34
- movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36
-
- punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31
- punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35
-
- punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33
- punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
-
- movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31
- punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13
-
- punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33
-
- movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35
- punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17
-
- punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37
- movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33
-
- punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37
- punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27
-
- movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13
- punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07
-
- punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17
-
- ; xmm0 0
- ; xmm1 4
- ; xmm2 1
- ; xmm3 3
-
- movdqa xmm5, xmm0
- movdqa xmm2, xmm1
-
- paddw xmm0, xmm3 ; a1 = 0 + 3
- paddw xmm1, xmm4 ; b1 = 1 + 2
-
- psubw xmm4, xmm2 ; c1 = 1 - 2
- psubw xmm5, xmm3 ; d1 = 0 - 3
-
- pxor xmm6, xmm6 ; zero out for compare
-
- pcmpeqw xmm6, xmm5 ; d1 != 0
-
- pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper,
- ; and keep bit 0 of lower
-
- ; output 0 and 2
- movdqa xmm2, xmm0 ; a1
-
- paddw xmm0, xmm1 ; a1 + b1
- psubw xmm2, xmm1 ; a1 - b1
-
- paddw xmm0, XMMWORD PTR[GLOBAL(_7w)]
- paddw xmm2, XMMWORD PTR[GLOBAL(_7w)]
-
- psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4
- psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4
-
- ; output 1 and 3
- ; interleave c1, d1
- movdqa xmm1, xmm5 ; d1
- punpcklwd xmm1, xmm4 ; c1 d1
- punpckhwd xmm5, xmm4 ; c1 d1
-
- movdqa xmm3, xmm1
- movdqa xmm4, xmm5
-
- pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
- pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
-
- pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
- pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
-
- paddd xmm1, XMMWORD PTR[GLOBAL(_12000)]
- paddd xmm4, XMMWORD PTR[GLOBAL(_12000)]
- paddd xmm3, XMMWORD PTR[GLOBAL(_51000)]
- paddd xmm5, XMMWORD PTR[GLOBAL(_51000)]
-
- psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
- psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
- psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
- psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
-
- packssdw xmm1, xmm4 ; op[4]
- packssdw xmm3, xmm5 ; op[12]
-
- paddw xmm1, xmm6 ; op[4] += (d1!=0)
-
- movdqa xmm4, xmm0
- movdqa xmm5, xmm2
-
- punpcklqdq xmm0, xmm1
- punpckhqdq xmm4, xmm1
-
- punpcklqdq xmm2, xmm3
- punpckhqdq xmm5, xmm3
-
- movdqa XMMWORD PTR[output + 0 ], xmm0
- movdqa XMMWORD PTR[output + 16], xmm2
- movdqa XMMWORD PTR[output + 32], xmm4
- movdqa XMMWORD PTR[output + 48], xmm5
-
- STACK_FRAME_DESTROY
-
-SECTION_RODATA
-align 16
-_5352_2217:
- dw 5352
- dw 2217
- dw 5352
- dw 2217
- dw 5352
- dw 2217
- dw 5352
- dw 2217
-align 16
-_2217_neg5352:
- dw 2217
- dw -5352
- dw 2217
- dw -5352
- dw 2217
- dw -5352
- dw 2217
- dw -5352
-align 16
-_mult_add:
- times 8 dw 1
-align 16
-_cmp_mask:
- times 4 dw 1
- times 4 dw 0
-align 16
-_cmp_mask8x4:
- times 8 dw 1
-align 16
-_mult_sub:
- dw 1
- dw -1
- dw 1
- dw -1
- dw 1
- dw -1
- dw 1
- dw -1
-align 16
-_7:
- times 4 dd 7
-align 16
-_7w:
- times 8 dw 7
-align 16
-_14500:
- times 4 dd 14500
-align 16
-_7500:
- times 4 dd 7500
-align 16
-_12000:
- times 4 dd 12000
-align 16
-_51000:
- times 4 dd 51000
diff --git a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
index 358d979eb..49cb837e0 100644
--- a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
+++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
@@ -11,6 +11,111 @@
#include <emmintrin.h> // SSE2
#include "vp9/common/vp9_idct.h" // for cospi constants
+void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we tranpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ const int stride = pitch >> 1;
+ int pass;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
+ const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
+ const __m128i kOne = _mm_set1_epi16(1);
+ __m128i in0, in1, in2, in3;
+ // Load inputs.
+ {
+ in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in2 = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in3 = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ // x = x << 4
+ in0 = _mm_slli_epi16(in0, 4);
+ in1 = _mm_slli_epi16(in1, 4);
+ in2 = _mm_slli_epi16(in2, 4);
+ in3 = _mm_slli_epi16(in3, 4);
+ // if (i == 0 && input[0]) input[0] += 1;
+ {
+ // The mask will only contain wether the first value is zero, all
+ // other comparison will fail as something shifted by 4 (above << 4)
+ // can never be equal to one. To increment in the non-zero case, we
+ // add the mask and one for the first element:
+ // - if zero, mask = -1, v = v - 1 + 1 = v
+ // - if non-zero, mask = 0, v = v + 0 + 1 = v + 1
+ __m128i mask = _mm_cmpeq_epi16(in0, k__nonzero_bias_a);
+ in0 = _mm_add_epi16(in0, mask);
+ in0 = _mm_add_epi16(in0, k__nonzero_bias_b);
+ }
+ }
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ // Transform 1/2: Add/substract
+ const __m128i r0 = _mm_add_epi16(in0, in3);
+ const __m128i r1 = _mm_add_epi16(in1, in2);
+ const __m128i r2 = _mm_sub_epi16(in1, in2);
+ const __m128i r3 = _mm_sub_epi16(in0, in3);
+ // Transform 1/2: Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ // Combine and transpose
+ const __m128i res0 = _mm_packs_epi32(w0, w2);
+ const __m128i res1 = _mm_packs_epi32(w4, w6);
+ // 00 01 02 03 20 21 22 23
+ // 10 11 12 13 30 31 32 33
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+ const __m128i tr0_1 = _mm_unpackhi_epi16(res0, res1);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ in0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ in2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ // 00 10 20 30 01 11 21 31 in0 contains 0 followed by 1
+ // 02 12 22 32 03 13 23 33 in2 contains 2 followed by 3
+ if (0 == pass) {
+ // Extract values in the high part for second pass as transform code
+ // only uses the first four values.
+ in1 = _mm_unpackhi_epi64(in0, in0);
+ in3 = _mm_unpackhi_epi64(in2, in2);
+ } else {
+ // Post-condition output and store it (v + 1) >> 2, taking advantage
+ // of the fact 1/3 are stored just after 0/2.
+ __m128i out01 = _mm_add_epi16(in0, kOne);
+ __m128i out23 = _mm_add_epi16(in2, kOne);
+ out01 = _mm_srai_epi16(out01, 2);
+ out23 = _mm_srai_epi16(out23, 2);
+ _mm_storeu_si128((__m128i *)(output + 0 * 4), out01);
+ _mm_storeu_si128((__m128i *)(output + 2 * 4), out23);
+ }
+ }
+}
+
+void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) {
+ vp9_short_fdct4x4_sse2(input, output, pitch);
+ vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch);
+}
+
void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) {
const int stride = pitch >> 1;
int pass;
diff --git a/vp9/encoder/x86/vp9_sad4d_sse2.asm b/vp9/encoder/x86/vp9_sad4d_sse2.asm
index 3716d91ec..25dd064e1 100644
--- a/vp9/encoder/x86/vp9_sad4d_sse2.asm
+++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm
@@ -215,7 +215,11 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
INIT_XMM sse2
SADNXN4D 64, 64
+SADNXN4D 64, 32
+SADNXN4D 32, 64
SADNXN4D 32, 32
+SADNXN4D 32, 16
+SADNXN4D 16, 32
SADNXN4D 16, 16
SADNXN4D 16, 8
SADNXN4D 8, 16
diff --git a/vp9/encoder/x86/vp9_sad_sse2.asm b/vp9/encoder/x86/vp9_sad_sse2.asm
index ea482e071..ea92377ee 100644
--- a/vp9/encoder/x86/vp9_sad_sse2.asm
+++ b/vp9/encoder/x86/vp9_sad_sse2.asm
@@ -14,11 +14,11 @@ SECTION .text
; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
-INIT_XMM sse2
-cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+%macro SAD64XN 1
+cglobal sad64x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
- mov n_rowsd, 64
+ mov n_rowsd, %1
pxor m0, m0
.loop:
movu m1, [refq]
@@ -42,14 +42,19 @@ cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
paddd m0, m1
movd eax, m0
RET
+%endmacro
+
+INIT_XMM sse2
+SAD64XN 64 ; sad64x64_sse2
+SAD64XN 32 ; sad64x32_sse2
; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
-INIT_XMM sse2
-cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+%macro SAD32XN 1
+cglobal sad32x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
- mov n_rowsd, 16
+ mov n_rowsd, %1/2
pxor m0, m0
.loop:
@@ -74,6 +79,12 @@ cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
paddd m0, m1
movd eax, m0
RET
+%endmacro
+
+INIT_XMM sse2
+SAD32XN 64 ; sad32x64_sse2
+SAD32XN 32 ; sad32x32_sse2
+SAD32XN 16 ; sad32x16_sse2
; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
@@ -112,6 +123,7 @@ cglobal sad16x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \
%endmacro
INIT_XMM sse2
+SAD16XN 32 ; sad16x32_sse2
SAD16XN 16 ; sad16x16_sse2
SAD16XN 8 ; sad16x8_sse2
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 43dba1373..13785f71b 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -90,7 +90,6 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.h
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_subtract_mmx.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm