summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfigure5
-rw-r--r--test/convolve_test.cc36
-rw-r--r--test/test-data.sha12
-rw-r--r--test/test.mk2
-rw-r--r--test/test_vector_test.cc2
-rw-r--r--vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm237
-rw-r--r--vp9/common/vp9_convolve.c5
-rw-r--r--vp9/common/vp9_entropymv.c82
-rw-r--r--vp9/common/vp9_loopfilter.c10
-rw-r--r--vp9/common/vp9_onyxc_int.h4
-rw-r--r--vp9/common/vp9_postproc.c3
-rw-r--r--vp9/common/vp9_rtcd_defs.sh4
-rw-r--r--vp9/decoder/vp9_decodframe.c30
-rw-r--r--vp9/decoder/vp9_onyxd_if.c8
-rw-r--r--vp9/encoder/vp9_encodeframe.c72
-rw-r--r--vp9/encoder/vp9_onyx_if.c10
-rw-r--r--vp9/encoder/vp9_rdopt.c4
-rw-r--r--vp9/vp9_common.mk7
-rw-r--r--vp9/vp9_cx_iface.c2
-rw-r--r--vp9/vp9_dx_iface.c4
-rw-r--r--vp9/vp9cx.mk2
21 files changed, 453 insertions, 78 deletions
diff --git a/configure b/configure
index a2520819e..e7f9f4f09 100755
--- a/configure
+++ b/configure
@@ -38,6 +38,7 @@ Advanced options:
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_mem_tracker} track memory usage
${toggle_postproc} postprocessing
+ ${toggle_vp9_postproc} vp9 specific postprocessing
${toggle_multithread} multithreaded encoding and decoding
${toggle_spatial_resampling} spatial sampling (scaling) support
${toggle_realtime_only} enable this option while building for real-time encoding
@@ -279,6 +280,7 @@ CONFIG_LIST="
dc_recon
runtime_cpu_detect
postproc
+ vp9_postproc
multithread
internal_stats
${CODECS}
@@ -333,6 +335,7 @@ CMDLINE_SELECT="
dequant_tokens
dc_recon
postproc
+ vp9_postproc
multithread
internal_stats
${CODECS}
@@ -438,7 +441,7 @@ process_targets() {
done
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
- ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
+ ! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 9bdce69ff..3100571da 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <string.h>
#include "test/acm_random.h"
#include "test/register_state_check.h"
#include "test/util.h"
@@ -187,7 +188,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
protected:
static const int kDataAlignment = 16;
- static const int kOuterBlockSize = 128;
+ static const int kOuterBlockSize = 256;
static const int kInputStride = kOuterBlockSize;
static const int kOutputStride = kOuterBlockSize;
static const int kMaxDimension = 64;
@@ -224,6 +225,10 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
input_[i] = prng.Rand8Extremes();
}
+ void SetConstantInput(int value) {
+ memset(input_, value, kInputBufferSize);
+ }
+
void CheckGuardBlocks() {
for (int i = 0; i < kOutputBufferSize; ++i) {
if (IsIndexInBorder(i))
@@ -543,6 +548,35 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
}
}
+/* This test exercises that enough rows and columns are filtered with every
+ possible initial fractional positions and scaling steps. */
+TEST_P(ConvolveTest, CheckScalingFiltering) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+
+ SetConstantInput(127);
+
+ for (int frac = 0; frac < 16; ++frac) {
+ for (int step = 1; step <= 32; ++step) {
+ /* Test the horizontal and vertical filters in combination. */
+ REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ vp9_sub_pel_filters_8[frac], step,
+ vp9_sub_pel_filters_8[frac], step,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y) {
+ for (int x = 0; x < Width(); ++x) {
+ ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x])
+ << "x == " << x << ", y == " << y
+ << ", frac == " << frac << ", step == " << step;
+ }
+ }
+ }
+ }
+}
+
using std::tr1::make_tuple;
const ConvolveFunctions convolve8_c(
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 579d7e254..370ffc13c 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -522,3 +522,5 @@ fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce vp90-2-03-size-226x226.webm
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 vp90-2-03-size-226x226.webm.md5
495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5
+b6524e4084d15b5d0caaa3d3d1368db30cbee69c vp90-2-03-deltaq.webm
+65f45ec9a55537aac76104818278e0978f94a678 vp90-2-03-deltaq.webm.md5
diff --git a/test/test.mk b/test/test.mk
index 2042c86d9..4eb599dd8 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -629,5 +629,7 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 4cd356d81..9bd03b997 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -159,7 +159,7 @@ const char *kVP9TestVectors[] = {
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
- "vp90-2-03-size-226x226.webm",
+ "vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
#if CONFIG_NON420
"vp91-2-04-yv444.webm"
#endif
diff --git a/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm b/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
new file mode 100644
index 000000000..963ef35da
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
@@ -0,0 +1,237 @@
+;
+; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+ EXPORT |vp9_short_iht4x4_add_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+ ; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are
+ ; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain
+ ; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back
+ ; into d16-d19 registers. This macro will touch q10- q15 registers and use
+ ; them as buffer during calculation.
+ MACRO
+ IDCT4x4_1D
+ ; stage 1
+ vadd.s16 d23, d16, d18 ; (input[0] + input[2])
+ vsub.s16 d24, d16, d18 ; (input[0] - input[2])
+
+ vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64
+ vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64
+ vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64
+ vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64
+ vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64
+ vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64
+
+ ; dct_const_round_shift
+ vqrshrn.s32 d26, q13, #14
+ vqrshrn.s32 d27, q14, #14
+ vqrshrn.s32 d29, q15, #14
+ vqrshrn.s32 d28, q10, #14
+
+ ; stage 2
+ ; output[0] = step[0] + step[3];
+ ; output[1] = step[1] + step[2];
+ ; output[3] = step[0] - step[3];
+ ; output[2] = step[1] - step[2];
+ vadd.s16 q8, q13, q14
+ vsub.s16 q9, q13, q14
+ vswp d18, d19
+ MEND
+
+ ; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which
+ ; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9.
+ ; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be
+ ; stored back into d16-d19 registers. This macro will touch q11,q12,q13,
+ ; q14,q15 registers and use them as buffer during calculation.
+ MACRO
+ IADST4x4_1D
+ vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0
+ vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0
+ vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1
+ vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2
+ vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2
+ vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit
+ vaddw.s16 q15, q15, d19 ; x0 + x3
+ vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3
+ vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2
+ vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3
+
+ vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5
+ vadd.s32 q10, q10, q8
+ vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6
+ vdup.32 q8, r0 ; duplicate sinpi_3_9
+ vsub.s32 q11, q11, q9
+ vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7
+
+ vadd.s32 q13, q10, q12 ; s0 = x0 + x3
+ vadd.s32 q10, q10, q11 ; x0 + x1
+ vadd.s32 q14, q11, q12 ; s1 = x1 + x3
+ vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3
+
+ ; dct_const_round_shift
+ vqrshrn.s32 d16, q13, #14
+ vqrshrn.s32 d17, q14, #14
+ vqrshrn.s32 d18, q15, #14
+ vqrshrn.s32 d19, q10, #14
+ MEND
+
+ ; Generate cosine constants in d6 - d8 for the IDCT
+ MACRO
+ GENERATE_COSINE_CONSTANTS
+ ; cospi_8_64 = 15137 = 0x3b21
+ mov r0, #0x3b00
+ add r0, #0x21
+ ; cospi_16_64 = 11585 = 0x2d41
+ mov r3, #0x2d00
+ add r3, #0x41
+ ; cospi_24_64 = 6270 = 0x187e
+ mov r12, #0x1800
+ add r12, #0x7e
+
+ ; generate constant vectors
+ vdup.16 d0, r0 ; duplicate cospi_8_64
+ vdup.16 d1, r3 ; duplicate cospi_16_64
+ vdup.16 d2, r12 ; duplicate cospi_24_64
+ MEND
+
+ ; Generate sine constants in d1 - d4 for the IADST.
+ MACRO
+ GENERATE_SINE_CONSTANTS
+ ; sinpi_1_9 = 5283 = 0x14A3
+ mov r0, #0x1400
+ add r0, #0xa3
+ ; sinpi_2_9 = 9929 = 0x26C9
+ mov r3, #0x2600
+ add r3, #0xc9
+ ; sinpi_4_9 = 15212 = 0x3B6C
+ mov r12, #0x3b00
+ add r12, #0x6c
+
+ ; generate constant vectors
+ vdup.16 d3, r0 ; duplicate sinpi_1_9
+
+ ; sinpi_3_9 = 13377 = 0x3441
+ mov r0, #0x3400
+ add r0, #0x41
+
+ vdup.16 d4, r3 ; duplicate sinpi_2_9
+ vdup.16 d5, r12 ; duplicate sinpi_4_9
+ vdup.16 q3, r0 ; duplicate sinpi_3_9
+ MEND
+
+ ; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19.
+ MACRO
+ TRANSPOSE4X4
+ vtrn.16 d16, d17
+ vtrn.16 d18, d19
+ vtrn.32 q8, q9
+ MEND
+
+ AREA Block, CODE, READONLY ; name this block of code
+;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest,
+; int dest_stride, int tx_type)
+;
+; r0 int16_t input
+; r1 uint8_t *dest
+; r2 int dest_stride
+; r3 int tx_type)
+; This function will only handle tx_type of 1,2,3.
+|vp9_short_iht4x4_add_neon| PROC
+
+ ; load the inputs into d16-d19
+ vld1.s16 {q8,q9}, [r0]!
+
+ ; transpose the input data
+ TRANSPOSE4X4
+
+ ; decide the type of transform
+ cmp r3, #2
+ beq idct_iadst
+ cmp r3, #3
+ beq iadst_iadst
+
+iadst_idct
+ ; generate constants
+ GENERATE_COSINE_CONSTANTS
+ GENERATE_SINE_CONSTANTS
+
+ ; first transform rows
+ IDCT4x4_1D
+
+ ; transpose the matrix
+ TRANSPOSE4X4
+
+ ; then transform columns
+ IADST4x4_1D
+
+ b end_vp9_short_iht4x4_add_neon
+
+idct_iadst
+ ; generate constants
+ GENERATE_COSINE_CONSTANTS
+ GENERATE_SINE_CONSTANTS
+
+ ; first transform rows
+ IADST4x4_1D
+
+ ; transpose the matrix
+ TRANSPOSE4X4
+
+ ; then transform columns
+ IDCT4x4_1D
+
+ b end_vp9_short_iht4x4_add_neon
+
+iadst_iadst
+ ; generate constants
+ GENERATE_SINE_CONSTANTS
+
+ ; first transform rows
+ IADST4x4_1D
+
+ ; transpose the matrix
+ TRANSPOSE4X4
+
+ ; then transform columns
+ IADST4x4_1D
+
+end_vp9_short_iht4x4_add_neon
+ ; ROUND_POWER_OF_TWO(temp_out[j], 4)
+ vrshr.s16 q8, q8, #4
+ vrshr.s16 q9, q9, #4
+
+ vld1.32 {d26[0]}, [r1], r2
+ vld1.32 {d26[1]}, [r1], r2
+ vld1.32 {d27[0]}, [r1], r2
+ vld1.32 {d27[1]}, [r1]
+
+ ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
+ vaddw.u8 q8, q8, d26
+ vaddw.u8 q9, q9, d27
+
+ ; clip_pixel
+ vqmovun.s16 d26, q8
+ vqmovun.s16 d27, q9
+
+ ; do the stores in reverse order with negative post-increment, by changing
+ ; the sign of the stride
+ rsb r2, r2, #0
+ vst1.32 {d27[1]}, [r1], r2
+ vst1.32 {d27[0]}, [r1], r2
+ vst1.32 {d26[1]}, [r1], r2
+ vst1.32 {d26[0]}, [r1] ; no post-increment
+ bx lr
+ ENDP ; |vp9_short_iht4x4_add_neon|
+
+ END
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 1e6cd4404..be092f41c 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -195,7 +195,7 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
* h == 64, taps == 8.
*/
uint8_t temp[64 * 135];
- int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
+ int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
assert(w <= 64);
assert(h <= 64);
@@ -203,9 +203,6 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
- if (intermediate_height < h)
- intermediate_height = h;
-
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4, w,
intermediate_height, taps);
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 6cfc34697..c6eefda92 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -79,20 +79,59 @@ static const nmv_context default_nmv_context = {
#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
+static const uint8_t log_in_base_2[] = {
+ 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
+};
+
MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
MV_CLASS_TYPE c = MV_CLASS_0;
- if (z < CLASS0_SIZE * 8) c = MV_CLASS_0;
- else if (z < CLASS0_SIZE * 16) c = MV_CLASS_1;
- else if (z < CLASS0_SIZE * 32) c = MV_CLASS_2;
- else if (z < CLASS0_SIZE * 64) c = MV_CLASS_3;
- else if (z < CLASS0_SIZE * 128) c = MV_CLASS_4;
- else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5;
- else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6;
- else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
- else if (z < CLASS0_SIZE * 2048) c = MV_CLASS_8;
- else if (z < CLASS0_SIZE * 4096) c = MV_CLASS_9;
- else if (z < CLASS0_SIZE * 8192) c = MV_CLASS_10;
- else assert(0);
+ if (z >= CLASS0_SIZE * 4096)
+ c = MV_CLASS_10;
+ else
+ c = log_in_base_2[z >> 3];
+
if (offset)
*offset = z - mv_class_base(c);
return c;
@@ -123,29 +162,18 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts,
d = (o >> 3); /* int mv data */
f = (o >> 1) & 3; /* fractional pel mv data */
e = (o & 1); /* high precision mv data */
+
if (c == MV_CLASS_0) {
comp_counts->class0[d] += incr;
+ comp_counts->class0_fp[d][f] += incr;
+ comp_counts->class0_hp[e] += usehp * incr;
} else {
int i;
int b = c + CLASS0_BITS - 1; // number of bits
for (i = 0; i < b; ++i)
comp_counts->bits[i][((d >> i) & 1)] += incr;
- }
-
- /* Code the fractional pel bits */
- if (c == MV_CLASS_0) {
- comp_counts->class0_fp[d][f] += incr;
- } else {
comp_counts->fp[f] += incr;
- }
-
- /* Code the high precision bit */
- if (usehp) {
- if (c == MV_CLASS_0) {
- comp_counts->class0_hp[e] += incr;
- } else {
- comp_counts->hp[e] += incr;
- }
+ comp_counts->hp[e] += usehp * incr;
}
}
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index cfa61c20b..df806ac56 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -777,6 +777,7 @@ static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
}
}
}
+#if CONFIG_NON420
static void filter_block_plane_non420(VP9_COMMON *cm,
struct macroblockd_plane *plane,
const MODE_INFO *mi,
@@ -896,6 +897,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
dst->buf += 8 * dst->stride;
}
}
+#endif
static void filter_block_plane(VP9_COMMON *const cm,
struct macroblockd_plane *const plane,
@@ -981,8 +983,10 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
LOOP_FILTER_MASK lfm;
+#if CONFIG_NON420
int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
xd->plane[1].subsampling_x == 1);
+#endif
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO* const mi = cm->mi + mi_row * cm->mode_info_stride;
@@ -993,16 +997,22 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
+#if CONFIG_NON420
if (use_420)
+#endif
setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mode_info_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
+#if CONFIG_NON420
if (use_420)
+#endif
filter_block_plane(cm, &xd->plane[plane], mi + mi_col, mi_row, mi_col,
&lfm);
+#if CONFIG_NON420
else
filter_block_plane_non420(cm, &xd->plane[plane], mi + mi_col,
mi_row, mi_col);
+#endif
}
}
}
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index a669cc5e7..f0bc063f2 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -20,7 +20,7 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_quant_common.h"
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
#include "vp9/common/vp9_postproc.h"
#endif
@@ -201,7 +201,7 @@ typedef struct VP9Common {
unsigned int current_video_frame;
int version;
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
struct postproc_state postproc_state;
#endif
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index 48d3d2d98..955e6766a 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -1011,7 +1011,8 @@ int vp9_post_proc_frame(struct VP9Common *cm,
/* handle problem with extending borders */
dest->y_width = cm->width;
dest->y_height = cm->height;
- dest->uv_height = dest->y_height / 2;
+ dest->uv_width = dest->y_width >> cm->subsampling_x;
+ dest->uv_height = dest->y_height >> cm->subsampling_y;
return 0;
}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 104db6aeb..c2777aa51 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -237,7 +237,7 @@ specialize vp9_loop_filter_horizontal_edge mmx neon
#
# post proc
#
-if [ "$CONFIG_POSTPROC" = "yes" ]; then
+if [ "$CONFIG_VP9_POSTPROC" = "yes" ]; then
prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit"
specialize vp9_mbpost_proc_down mmx sse2
vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm
@@ -325,7 +325,7 @@ prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
specialize vp9_short_idct1_32x32
prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_short_iht4x4_add sse2
+specialize vp9_short_iht4x4_add sse2 neon
prototype void vp9_short_iht8x8_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
specialize vp9_short_iht8x8_add sse2
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 41e406d95..6cb7c094b 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -77,14 +77,11 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
}
-static void init_dequantizer(VP9_COMMON *cm, MACROBLOCKD *xd) {
+static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
int i;
- const int segment_id = xd->mode_info_context->mbmi.segment_id;
- xd->q_index = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-
- xd->plane[0].dequant = cm->y_dequant[xd->q_index];
+ xd->plane[0].dequant = cm->y_dequant[q_index];
for (i = 1; i < MAX_MB_PLANE; i++)
- xd->plane[i].dequant = cm->uv_dequant[xd->q_index];
+ xd->plane[i].dequant = cm->uv_dequant[q_index];
}
static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -149,14 +146,17 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
}
static int decode_tokens(VP9D_COMP *pbi, BLOCK_SIZE bsize, vp9_reader *r) {
+ VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
- if (xd->mode_info_context->mbmi.skip_coeff) {
- reset_skip_context(xd, bsize);
+ if (mbmi->skip_coeff) {
+ reset_skip_context(xd, bsize);
return -1;
} else {
- if (pbi->common.seg.enabled)
- init_dequantizer(&pbi->common, xd);
+ if (cm->seg.enabled)
+ setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id,
+ cm->base_qindex));
// TODO(dkovalev) if (!vp9_reader_has_error(r))
return vp9_decode_tokens(pbi, r, bsize);
@@ -173,6 +173,7 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize,
xd->mode_info_context = cm->mi + offset;
xd->mode_info_context->mbmi.sb_type = bsize;
+ xd->mode_info_stride = cm->mode_info_stride;
// Special case: if prev_mi is NULL, the previous mode info context
// cannot be used.
xd->prev_mode_info_context = cm->prev_mi ? cm->prev_mi + offset : NULL;
@@ -453,8 +454,7 @@ static void setup_loopfilter(struct loopfilter *lf,
static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) {
const int old = *delta_q;
- if (vp9_rb_read_bit(rb))
- *delta_q = vp9_rb_read_signed_literal(rb, 4);
+ *delta_q = vp9_rb_read_bit(rb) ? vp9_rb_read_signed_literal(rb, 4) : 0;
return old != *delta_q;
}
@@ -958,11 +958,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt header length");
- xd->mode_info_context = cm->mi;
- xd->prev_mode_info_context = cm->prev_mi;
- xd->mode_info_stride = cm->mode_info_stride;
-
- init_dequantizer(cm, &pbi->mb);
+ setup_plane_dequants(cm, &pbi->mb, cm->base_qindex);
cm->fc = cm->frame_contexts[cm->frame_context_idx];
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index b609f9f31..505e9dc5d 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -13,7 +13,7 @@
#include <stdio.h>
#include "vp9/common/vp9_onyxc_int.h"
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
#include "vp9/common/vp9_postproc.h"
#endif
#include "vp9/decoder/vp9_onyxd.h"
@@ -421,7 +421,7 @@ int vp9_get_raw_frame(VP9D_PTR ptr, YV12_BUFFER_CONFIG *sd,
*time_stamp = pbi->last_time_stamp;
*time_end_stamp = 0;
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
ret = vp9_post_proc_frame(&pbi->common, sd, flags);
#else
@@ -429,7 +429,9 @@ int vp9_get_raw_frame(VP9D_PTR ptr, YV12_BUFFER_CONFIG *sd,
*sd = *pbi->common.frame_to_show;
sd->y_width = pbi->common.width;
sd->y_height = pbi->common.height;
- sd->uv_height = pbi->common.height / 2;
+ sd->uv_width = sd->y_width >> pbi->common.subsampling_x;
+ sd->uv_height = sd->y_height >> pbi->common.subsampling_y;
+
ret = 0;
} else {
ret = -1;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index eb83903ca..45758e7cb 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -851,13 +851,75 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
}
}
-static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m, BLOCK_SIZE bsize) {
+// Check to see if the given partition size is allowed for a specified number
+// of 8x8 block rows and columns remaining in the image.
+// If not then return the largest allowed partition size
+static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
+ int rows_left, int cols_left,
+ int *bh, int *bw) {
+ if ((rows_left <= 0) || (cols_left <= 0)) {
+ return MIN(bsize, BLOCK_8X8);
+ } else {
+ for (; bsize > 0; --bsize) {
+ *bh = num_8x8_blocks_high_lookup[bsize];
+ *bw = num_8x8_blocks_wide_lookup[bsize];
+ if ((*bh <= rows_left) && (*bw <= cols_left)) {
+ break;
+ }
+ }
+ }
+ return bsize;
+}
+
+// This function attempts to set all mode info entries in a given SB64
+// to the same block partition size.
+// However, at the bottom and right borders of the image the requested size
+// may not be allowed in which case this code attempts to choose the largest
+// allowable partition.
+static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m,
+ int mi_row, int mi_col) {
VP9_COMMON *const cm = &cpi->common;
+ BLOCK_SIZE bsize = cpi->sf.always_this_block_size;
const int mis = cm->mode_info_stride;
+ int row8x8_remaining = cm->cur_tile_mi_row_end - mi_row;
+ int col8x8_remaining = cm->cur_tile_mi_col_end - mi_col;
int block_row, block_col;
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- m[block_row * mis + block_col].mbmi.sb_type = bsize;
+
+ assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
+
+ // Apply the requested partition size to the SB64 if it is all "in image"
+ if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
+ (row8x8_remaining >= MI_BLOCK_SIZE)) {
+ for (block_row = 0; block_row < MI_BLOCK_SIZE; ++block_row) {
+ for (block_col = 0; block_col < MI_BLOCK_SIZE; ++block_col) {
+ m[block_row * mis + block_col].mbmi.sb_type = bsize;
+ }
+ }
+ } else {
+ // Else this is a partial SB64.
+ int bh = num_8x8_blocks_high_lookup[bsize];
+ int bw = num_8x8_blocks_wide_lookup[bsize];
+ int sub_block_row;
+ int sub_block_col;
+ int row_index;
+ int col_index;
+
+ for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
+ for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
+ // Find a partition size that fits
+ bsize = find_partition_size(cpi->sf.always_this_block_size,
+ (row8x8_remaining - block_row),
+ (col8x8_remaining - block_col), &bh, &bw);
+
+ // Set the mi entries for all 8x8 blocks within the selected size
+ for (sub_block_row = 0; sub_block_row < bh; ++sub_block_row) {
+ for (sub_block_col = 0; sub_block_col < bw; ++sub_block_col) {
+ row_index = block_row + sub_block_row;
+ col_index = block_col + sub_block_col;
+ m[row_index * mis + col_index].mbmi.sb_type = bsize;
+ }
+ }
+ }
}
}
}
@@ -1946,7 +2008,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
cpi->mb.source_variance = UINT_MAX;
if (cpi->sf.use_one_partition_size_always) {
set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
- set_partitioning(cpi, m, cpi->sf.always_this_block_size);
+ set_partitioning(cpi, m, mi_row, mi_col);
rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
} else if (cpi->sf.partition_by_variance) {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index e9c214ff8..d35b739fb 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -28,7 +28,7 @@
#include "vp9/encoder/vp9_segmentation.h"
#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
#include "vp9/common/vp9_postproc.h"
#endif
#include "vpx_mem/vpx_mem.h"
@@ -2895,7 +2895,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
set_mvcost(&cpi->mb);
}
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
if (cpi->oxcf.noise_sensitivity > 0) {
int l = 0;
@@ -3954,7 +3954,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
{
double frame_psnr2, frame_ssim2 = 0;
double weight = 0;
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer,
cm->lf.filter_level * 10 / 6);
#endif
@@ -4030,7 +4030,7 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
return -1;
else {
int ret;
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
ret = vp9_post_proc_frame(&cpi->common, dest, flags);
#else
@@ -4044,7 +4044,7 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
ret = -1;
}
-#endif // !CONFIG_POSTPROC
+#endif // !CONFIG_VP9_POSTPROC
vp9_clear_system_state();
return ret;
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 39b6544f6..647265bf6 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2916,8 +2916,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(sse_v - var_v < thresh_dc || sse_v == var_v)) {
x->skip = 1;
- *rate2 = 500;
- *rate_uv = 0;
+ // The cost of skip bit needs to be added.
+ *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
// Scaling factor for SSE from spatial domain to frequency domain
// is 16. Adjust distortion accordingly.
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index fb302abec..c6daecca0 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -73,11 +73,11 @@ VP9_COMMON_SRCS-yes += common/vp9_common_data.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
-VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h
-VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c
+VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
+VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
-ifeq ($(CONFIG_POSTPROC),yes)
+ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
endif
@@ -102,6 +102,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_1_add_neon$(AS
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_1_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht4x4_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 0874afdbc..e9549228e 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -888,7 +888,7 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
(void)ctr_id;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index e7362fca0..10b32385c 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -20,7 +20,7 @@
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/vp9_iface_common.h"
-#define VP9_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
+#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
typedef vpx_codec_stream_info_t vp9_stream_info_t;
/* Structures for handling memory allocations */
@@ -596,7 +596,7 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
-#if CONFIG_POSTPROC
+#if CONFIG_VP9_POSTPROC
vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
if (data) {
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 89de6014e..9fbf100f5 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -64,7 +64,7 @@ VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
VP9_CX_SRCS-yes += encoder/vp9_variance_c.c
-ifeq ($(CONFIG_POSTPROC),yes)
+ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
endif