summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/vpx_temporal_scalable_patterns.c9
-rw-r--r--test/cq_test.cc11
-rw-r--r--test/error_resilience_test.cc24
-rw-r--r--vp8/common/arm/neon/copymem16x16_neon.asm59
-rw-r--r--vp8/common/arm/neon/copymem8x4_neon.asm34
-rw-r--r--vp8/common/arm/neon/copymem8x8_neon.asm43
-rw-r--r--vp8/common/arm/neon/copymem_neon.c59
-rw-r--r--vp8/common/arm/neon/dc_only_idct_add_neon.asm54
-rw-r--r--vp8/common/arm/neon/dc_only_idct_add_neon.c42
-rw-r--r--vp8/common/arm/neon/dequant_idct_neon.asm131
-rw-r--r--vp8/common/arm/neon/dequant_idct_neon.c142
-rw-r--r--vp8/common/arm/neon/dequantizeb_neon.asm34
-rw-r--r--vp8/common/arm/neon/dequantizeb_neon.c27
-rw-r--r--vp8/vp8_common.mk10
-rw-r--r--vp9/common/vp9_reconintra.c14
-rw-r--r--vp9/encoder/vp9_encodeframe.c101
-rw-r--r--vp9/encoder/vp9_encodemb.c24
-rw-r--r--vp9/encoder/vp9_encodemv.c17
-rw-r--r--vp9/encoder/vp9_encodemv.h8
-rw-r--r--vp9/encoder/vp9_onyx_if.c14
-rw-r--r--vp9/encoder/vp9_onyx_int.h39
-rw-r--r--vp9/encoder/vp9_pickmode.c31
-rw-r--r--vp9/encoder/vp9_ratectrl.c17
-rw-r--r--vp9/encoder/vp9_rdopt.c138
-rw-r--r--vp9/encoder/vp9_tokenize.c4
-rw-r--r--vp9/encoder/vp9_tokenize.h2
26 files changed, 538 insertions, 550 deletions
diff --git a/examples/vpx_temporal_scalable_patterns.c b/examples/vpx_temporal_scalable_patterns.c
index 29a266d29..6ec1b6208 100644
--- a/examples/vpx_temporal_scalable_patterns.c
+++ b/examples/vpx_temporal_scalable_patterns.c
@@ -52,6 +52,12 @@ struct RateControlMetrics {
double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
};
+// Note: these rate control metrics assume only 1 key frame in the
+// sequence (i.e., first frame only). So for temporal pattern# 7
+// (which has key frame for every frame on base layer), the metrics
+// computation will be off/wrong.
+// TODO(marpan): Update these metrics to account for multiple key frames
+// in the stream.
static void set_rate_control_metrics(struct RateControlMetrics *rc,
vpx_codec_enc_cfg_t *cfg) {
unsigned int i = 0;
@@ -565,6 +571,9 @@ int main(int argc, char **argv) {
}
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
+ // This controls the maximum target size of the key frame.
+ // For generating smaller key frames, use a smaller max_intra_size_pct
+ // value, like 100 or 200.
max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
* ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
diff --git a/test/cq_test.cc b/test/cq_test.cc
index a2c829163..7da7b80aa 100644
--- a/test/cq_test.cc
+++ b/test/cq_test.cc
@@ -20,7 +20,7 @@ namespace {
const int kCQLevelMin = 4;
const int kCQLevelMax = 63;
const int kCQLevelStep = 8;
-const int kCQTargetBitrate = 2000;
+const unsigned int kCQTargetBitrate = 2000;
class CQTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<int> {
@@ -66,17 +66,17 @@ class CQTest : public ::libvpx_test::EncoderTest,
return pow(10.0, avg_psnr / 10.0) / file_size_;
}
- int file_size() const { return file_size_; }
+ size_t file_size() const { return file_size_; }
int n_frames() const { return n_frames_; }
private:
int cq_level_;
- int file_size_;
+ size_t file_size_;
double psnr_;
int n_frames_;
};
-int prev_actual_bitrate = kCQTargetBitrate;
+unsigned int prev_actual_bitrate = kCQTargetBitrate;
TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
@@ -88,7 +88,8 @@ TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
timebase.den, timebase.num, 0, 30);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const double cq_psnr_lin = GetLinearPSNROverBitrate();
- const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000);
+ const unsigned int cq_actual_bitrate =
+ static_cast<unsigned int>(file_size()) * 8 * 30 / (n_frames() * 1000);
EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate);
EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate);
prev_actual_bitrate = cq_actual_bitrate;
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
index 30c20e91f..4cd9efb86 100644
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -16,8 +16,8 @@
namespace {
-const int kMaxErrorFrames = 8;
-const int kMaxDroppableFrames = 8;
+const int kMaxErrorFrames = 12;
+const int kMaxDroppableFrames = 12;
class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
@@ -175,6 +175,10 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
}
}
+// Check for successful decoding and no encoder/decoder mismatch
+// if we lose (i.e., drop before decoding) a set of droppable
+// frames (i.e., frames that don't update any reference buffers).
+// Check both isolated and consecutive loss.
TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
@@ -186,14 +190,18 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
init_flags_ = VPX_CODEC_USE_PSNR;
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- timebase.den, timebase.num, 0, 30);
+ timebase.den, timebase.num, 0, 40);
// Error resilient mode ON.
cfg_.g_error_resilient = 1;
-
- // Set an arbitrary set of error frames same as droppable frames
- unsigned int num_droppable_frames = 2;
- unsigned int droppable_frame_list[] = {5, 16};
+ cfg_.kf_mode = VPX_KF_DISABLED;
+
+ // Set an arbitrary set of error frames same as droppable frames.
+ // In addition to isolated loss/drop, add a long consecutive series
+ // (of size 9) of dropped frames.
+ unsigned int num_droppable_frames = 11;
+ unsigned int droppable_frame_list[] = {5, 16, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30};
SetDroppableFrames(num_droppable_frames, droppable_frame_list);
SetErrorFrames(num_droppable_frames, droppable_frame_list);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
@@ -202,7 +210,7 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
<< GetMismatchFrames() << "\n";
EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
- // reset previously set error/droppable frames
+ // Reset previously set of error/droppable frames.
Reset();
#if 0
diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm
deleted file mode 100644
index bda4b9654..000000000
--- a/vp8/common/arm/neon/copymem16x16_neon.asm
+++ /dev/null
@@ -1,59 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_copy_mem16x16_neon|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA Block, CODE, READONLY ; name this block of code
-;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem16x16_neon| PROC
-
- vld1.u8 {q0}, [r0], r1
- vld1.u8 {q1}, [r0], r1
- vld1.u8 {q2}, [r0], r1
- vst1.u8 {q0}, [r2], r3
- vld1.u8 {q3}, [r0], r1
- vst1.u8 {q1}, [r2], r3
- vld1.u8 {q4}, [r0], r1
- vst1.u8 {q2}, [r2], r3
- vld1.u8 {q5}, [r0], r1
- vst1.u8 {q3}, [r2], r3
- vld1.u8 {q6}, [r0], r1
- vst1.u8 {q4}, [r2], r3
- vld1.u8 {q7}, [r0], r1
- vst1.u8 {q5}, [r2], r3
- vld1.u8 {q8}, [r0], r1
- vst1.u8 {q6}, [r2], r3
- vld1.u8 {q9}, [r0], r1
- vst1.u8 {q7}, [r2], r3
- vld1.u8 {q10}, [r0], r1
- vst1.u8 {q8}, [r2], r3
- vld1.u8 {q11}, [r0], r1
- vst1.u8 {q9}, [r2], r3
- vld1.u8 {q12}, [r0], r1
- vst1.u8 {q10}, [r2], r3
- vld1.u8 {q13}, [r0], r1
- vst1.u8 {q11}, [r2], r3
- vld1.u8 {q14}, [r0], r1
- vst1.u8 {q12}, [r2], r3
- vld1.u8 {q15}, [r0], r1
- vst1.u8 {q13}, [r2], r3
- vst1.u8 {q14}, [r2], r3
- vst1.u8 {q15}, [r2], r3
-
- mov pc, lr
-
- ENDP ; |vp8_copy_mem16x16_neon|
-
- END
diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm
deleted file mode 100644
index 35c0f6708..000000000
--- a/vp8/common/arm/neon/copymem8x4_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_copy_mem8x4_neon|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA Block, CODE, READONLY ; name this block of code
-;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem8x4_neon| PROC
- vld1.u8 {d0}, [r0], r1
- vld1.u8 {d1}, [r0], r1
- vst1.u8 {d0}, [r2], r3
- vld1.u8 {d2}, [r0], r1
- vst1.u8 {d1}, [r2], r3
- vld1.u8 {d3}, [r0], r1
- vst1.u8 {d2}, [r2], r3
- vst1.u8 {d3}, [r2], r3
-
- mov pc, lr
-
- ENDP ; |vp8_copy_mem8x4_neon|
-
- END
diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm
deleted file mode 100644
index 1f5b9411b..000000000
--- a/vp8/common/arm/neon/copymem8x8_neon.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_copy_mem8x8_neon|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA Block, CODE, READONLY ; name this block of code
-;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem8x8_neon| PROC
-
- vld1.u8 {d0}, [r0], r1
- vld1.u8 {d1}, [r0], r1
- vst1.u8 {d0}, [r2], r3
- vld1.u8 {d2}, [r0], r1
- vst1.u8 {d1}, [r2], r3
- vld1.u8 {d3}, [r0], r1
- vst1.u8 {d2}, [r2], r3
- vld1.u8 {d4}, [r0], r1
- vst1.u8 {d3}, [r2], r3
- vld1.u8 {d5}, [r0], r1
- vst1.u8 {d4}, [r2], r3
- vld1.u8 {d6}, [r0], r1
- vst1.u8 {d5}, [r2], r3
- vld1.u8 {d7}, [r0], r1
- vst1.u8 {d6}, [r2], r3
- vst1.u8 {d7}, [r2], r3
-
- mov pc, lr
-
- ENDP ; |vp8_copy_mem8x8_neon|
-
- END
diff --git a/vp8/common/arm/neon/copymem_neon.c b/vp8/common/arm/neon/copymem_neon.c
new file mode 100644
index 000000000..deced115c
--- /dev/null
+++ b/vp8/common/arm/neon/copymem_neon.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_copy_mem8x4_neon(
+ unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ uint8x8_t vtmp;
+ int r;
+
+ for (r = 0; r < 4; r++) {
+ vtmp = vld1_u8(src);
+ vst1_u8(dst, vtmp);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp8_copy_mem8x8_neon(
+ unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ uint8x8_t vtmp;
+ int r;
+
+ for (r = 0; r < 8; r++) {
+ vtmp = vld1_u8(src);
+ vst1_u8(dst, vtmp);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp8_copy_mem16x16_neon(
+ unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+ uint8x16_t qtmp;
+
+ for (r = 0; r < 16; r++) {
+ qtmp = vld1q_u8(src);
+ vst1q_u8(dst, qtmp);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
deleted file mode 100644
index 79ff02c69..000000000
--- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ /dev/null
@@ -1,54 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dc_only_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
-; int pred_stride, unsigned char *dst_ptr,
-; int dst_stride)
-
-; r0 input_dc
-; r1 pred_ptr
-; r2 pred_stride
-; r3 dst_ptr
-; sp dst_stride
-
-|vp8_dc_only_idct_add_neon| PROC
- add r0, r0, #4
- asr r0, r0, #3
- ldr r12, [sp]
- vdup.16 q0, r0
-
- vld1.32 {d2[0]}, [r1], r2
- vld1.32 {d2[1]}, [r1], r2
- vld1.32 {d4[0]}, [r1], r2
- vld1.32 {d4[1]}, [r1]
-
- vaddw.u8 q1, q0, d2
- vaddw.u8 q2, q0, d4
-
- vqmovun.s16 d2, q1
- vqmovun.s16 d4, q2
-
- vst1.32 {d2[0]}, [r3], r12
- vst1.32 {d2[1]}, [r3], r12
- vst1.32 {d4[0]}, [r3], r12
- vst1.32 {d4[1]}, [r3]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.c b/vp8/common/arm/neon/dc_only_idct_add_neon.c
new file mode 100644
index 000000000..ad5f41d7d
--- /dev/null
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dc_only_idct_add_neon(
+ int16_t input_dc,
+ unsigned char *pred_ptr,
+ int pred_stride,
+ unsigned char *dst_ptr,
+ int dst_stride) {
+ int i;
+ uint16_t a1 = ((input_dc + 4) >> 3);
+ uint32x2_t d2u32 = vdup_n_u32(0);
+ uint8x8_t d2u8;
+ uint16x8_t q1u16;
+ uint16x8_t qAdd;
+
+ qAdd = vdupq_n_u16(a1);
+
+ for (i = 0; i < 2; i++) {
+ d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
+ pred_ptr += pred_stride;
+ d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
+ pred_ptr += pred_stride;
+
+ q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
+ d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+ dst_ptr += dst_stride;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+ dst_ptr += dst_stride;
+ }
+}
diff --git a/vp8/common/arm/neon/dequant_idct_neon.asm b/vp8/common/arm/neon/dequant_idct_neon.asm
deleted file mode 100644
index 602cce676..000000000
--- a/vp8/common/arm/neon/dequant_idct_neon.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_add_neon(short *input, short *dq,
-; unsigned char *dest, int stride)
-; r0 short *input,
-; r1 short *dq,
-; r2 unsigned char *dest
-; r3 int stride
-
-|vp8_dequant_idct_add_neon| PROC
- vld1.16 {q3, q4}, [r0]
- vld1.16 {q5, q6}, [r1]
-
- add r1, r2, r3 ; r1 = dest + stride
- lsl r3, #1 ; 2x stride
-
- vld1.32 {d14[0]}, [r2], r3
- vld1.32 {d14[1]}, [r1], r3
- vld1.32 {d15[0]}, [r2]
- vld1.32 {d15[1]}, [r1]
-
- adr r12, cospi8sqrt2minus1 ; pointer to the first constant
-
- vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
- vmul.i16 q2, q4, q6
-
-;|short_idct4x4llm_neon| PROC
- vld1.16 {d0}, [r12]
- vswp d3, d4 ;q2(vp[4] vp[12])
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
-; memset(input, 0, 32) -- 32bytes
- vmov.i16 q14, #0
-
- vswp d3, d4
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vmov q15, q14
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vst1.16 {q14, q15}, [r0]
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vaddw.u8 q1, q1, d14
- vaddw.u8 q2, q2, d15
-
- sub r2, r2, r3
- sub r1, r1, r3
-
- vqmovun.s16 d0, q1
- vqmovun.s16 d1, q2
-
- vst1.32 {d0[0]}, [r2], r3
- vst1.32 {d0[1]}, [r1], r3
- vst1.32 {d1[0]}, [r2]
- vst1.32 {d1[1]}, [r1]
-
- bx lr
-
- ENDP ; |vp8_dequant_idct_add_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2 DCD 0x8a8c8a8c
-
- END
diff --git a/vp8/common/arm/neon/dequant_idct_neon.c b/vp8/common/arm/neon/dequant_idct_neon.c
new file mode 100644
index 000000000..58e11922c
--- /dev/null
+++ b/vp8/common/arm/neon/dequant_idct_neon.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2 = 35468;
+
+void vp8_dequant_idct_add_neon(
+ int16_t *input,
+ int16_t *dq,
+ unsigned char *dst,
+ int stride) {
+ unsigned char *dst0;
+ int32x2_t d14, d15;
+ int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+ int16x8_t q1, q2, q3, q4, q5, q6;
+ int16x8_t qEmpty = vdupq_n_s16(0);
+ int32x2x2_t d2tmp0, d2tmp1;
+ int16x4x2_t d2tmp2, d2tmp3;
+
+ d14 = d15 = vdup_n_s32(0);
+
+ // load input
+ q3 = vld1q_s16(input);
+ vst1q_s16(input, qEmpty);
+ input += 8;
+ q4 = vld1q_s16(input);
+ vst1q_s16(input, qEmpty);
+
+ // load dq
+ q5 = vld1q_s16(dq);
+ dq += 8;
+ q6 = vld1q_s16(dq);
+
+ // load src from dst
+ dst0 = dst;
+ d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
+ dst0 += stride;
+ d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
+ dst0 += stride;
+ d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
+ dst0 += stride;
+ d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
+
+ q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
+ vreinterpretq_u16_s16(q5)));
+ q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
+ vreinterpretq_u16_s16(q6)));
+
+ d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
+ d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
+
+ q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
+
+ q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+ q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+ q3 = vshrq_n_s16(q3, 1);
+ q4 = vshrq_n_s16(q4, 1);
+
+ q3 = vqaddq_s16(q3, q2);
+ q4 = vqaddq_s16(q4, q2);
+
+ d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+ d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+ vreinterpret_s16_s32(d2tmp1.val[0]));
+ d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+ vreinterpret_s16_s32(d2tmp1.val[1]));
+
+ // loop 2
+ q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
+
+ q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+ q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+ d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
+ d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
+
+ q3 = vshrq_n_s16(q3, 1);
+ q4 = vshrq_n_s16(q4, 1);
+
+ q3 = vqaddq_s16(q3, q2);
+ q4 = vqaddq_s16(q4, q2);
+
+ d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+ d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2 = vrshr_n_s16(d2, 3);
+ d3 = vrshr_n_s16(d3, 3);
+ d4 = vrshr_n_s16(d4, 3);
+ d5 = vrshr_n_s16(d5, 3);
+
+ d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+ vreinterpret_s16_s32(d2tmp1.val[0]));
+ d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+ vreinterpret_s16_s32(d2tmp1.val[1]));
+
+ q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
+ q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
+
+ q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
+ vreinterpret_u8_s32(d14)));
+ q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
+ vreinterpret_u8_s32(d15)));
+
+ d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
+ d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
+
+ dst0 = dst;
+ vst1_lane_s32((int32_t *)dst0, d14, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d14, 1);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d15, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d15, 1);
+ return;
+}
diff --git a/vp8/common/arm/neon/dequantizeb_neon.asm b/vp8/common/arm/neon/dequantizeb_neon.asm
deleted file mode 100644
index c8e0c31f2..000000000
--- a/vp8/common/arm/neon/dequantizeb_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequantize_b_loop_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 short *Q,
-; r1 short *DQC
-; r2 short *DQ
-|vp8_dequantize_b_loop_neon| PROC
- vld1.16 {q0, q1}, [r0]
- vld1.16 {q2, q3}, [r1]
-
- vmul.i16 q4, q0, q2
- vmul.i16 q5, q1, q3
-
- vst1.16 {q4, q5}, [r2]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/dequantizeb_neon.c b/vp8/common/arm/neon/dequantizeb_neon.c
new file mode 100644
index 000000000..60f69c8db
--- /dev/null
+++ b/vp8/common/arm/neon/dequantizeb_neon.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dequantize_b_loop_neon(
+ int16_t *Q,
+ int16_t *DQC,
+ int16_t *DQ) {
+ int16x8x2_t qQ, qDQC, qDQ;
+
+ qQ = vld2q_s16(Q);
+ qDQC = vld2q_s16(DQC);
+
+ qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
+ qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
+
+ vst2q_s16(DQ, qDQ);
+ return;
+}
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index a2127c9b7..ac91d7af5 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -159,10 +159,6 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
# common (neon)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem16x16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
@@ -177,10 +173,8 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
@@ -189,6 +183,10 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon
# common (neon intrinsics)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 96ba3e464..71a41a9de 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -382,34 +382,34 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
/* slower path if the block needs border extension */
if (x0 + 2 * bs <= frame_width) {
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, 2 * bs + 1);
+ vpx_memcpy(above_row, above_ref, 2 * bs);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);
+ vpx_memcpy(above_row, above_ref, bs);
vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 + bs <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);
+ vpx_memcpy(above_row, above_ref, bs);
vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
}
- above_row[-1] = left_available ? above_ref[-1] : 129;
}
+ above_row[-1] = left_available ? above_ref[-1] : 129;
} else {
/* faster path if the block does not need extension */
if (bs == 4 && right_available && left_available) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f529c9336..a9b51e0d4 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -102,6 +102,24 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
+static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi) {
+ unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64);
+ if (var < 256)
+ return BLOCK_64X64;
+ else
+ return BLOCK_32X32;
+}
+
+static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi) {
+ unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64);
+ if (var < 1024)
+ return BLOCK_32X32;
+ else if (var < 4096)
+ return BLOCK_16X16;
+ else
+ return BLOCK_8X8;
+}
+
// Original activity measure from Tim T's code.
static unsigned int tt_activity_measure(MACROBLOCK *x) {
unsigned int sse;
@@ -994,7 +1012,7 @@ static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
int index = block_row * mis + block_col;
// Find a partition size that fits
- bsize = find_partition_size(cpi->sf.always_this_block_size,
+ bsize = find_partition_size(bsize,
(row8x8_remaining - block_row),
(col8x8_remaining - block_col), &bh, &bw);
mi_8x8[index] = mi_upper_left + index;
@@ -1914,8 +1932,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
}
-static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
+static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common;
int mi_col;
@@ -1946,19 +1964,32 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
vp9_zero(cpi->mb.pred_mv);
- if (cpi->sf.use_lastframe_partitioning ||
- cpi->sf.use_one_partition_size_always ) {
+ if ((cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ cpi->sf.use_lastframe_partitioning) ||
+ cpi->sf.partition_search_type == FIXED_PARTITION ||
+ cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
cpi->mb.source_variance = UINT_MAX;
- if (cpi->sf.use_one_partition_size_always) {
+ if (cpi->sf.partition_search_type == FIXED_PARTITION) {
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
cpi->sf.always_this_block_size);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
+ } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
+ cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+ // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
+ // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
+ // map to the same thing.
+ BLOCK_SIZE bsize;
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ bsize = get_rd_var_based_fixed_partition(cpi);
+ set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+ rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1);
} else {
if ((cm->current_video_frame
% cpi->sf.last_partitioning_redo_frequency) == 0
@@ -2252,12 +2283,12 @@ static INLINE int get_block_col(int b32i, int b16i, int b8i) {
return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
}
-static void rtc_use_partition(VP9_COMP *cpi,
- const TileInfo *const tile,
- MODE_INFO **mi_8x8,
- TOKENEXTRA **tp, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon) {
+static void nonrd_use_partition(VP9_COMP *cpi,
+ const TileInfo *const tile,
+ MODE_INFO **mi_8x8,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *rate, int64_t *dist,
+ int do_recon) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -2270,8 +2301,8 @@ static void rtc_use_partition(VP9_COMP *cpi,
int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row);
int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col);
- int mi_8x8_width = num_8x8_blocks_wide_lookup[bsize];
- int mi_8x8_hight = num_8x8_blocks_high_lookup[bsize];
+ int bw = num_8x8_blocks_wide_lookup[bsize];
+ int bh = num_8x8_blocks_high_lookup[bsize];
int brate;
int64_t bdist;
@@ -2279,14 +2310,13 @@ static void rtc_use_partition(VP9_COMP *cpi,
*dist = 0;
// find prediction mode for each 8x8 block
- for (br = 0; br < rows; br += mi_8x8_hight) {
- for (bc = 0; bc < cols; bc += mi_8x8_width) {
+ for (br = 0; br < rows; br += bh) {
+ for (bc = 0; bc < cols; bc += bw) {
int row = mi_row + br;
int col = mi_col + bc;
- int bh = 0, bw = 0;
+
BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
&bh, &bw);
-
set_offsets(cpi, tile, row, col, bs);
if (cm->frame_type != KEY_FRAME)
@@ -2298,8 +2328,9 @@ static void rtc_use_partition(VP9_COMP *cpi,
*dist += bdist;
for (j = 0; j < bh; ++j)
- for (i = 0; i < bw; ++i)
+ for (i = 0; i < bw; ++i) {
xd->mi_8x8[j * mis + i] = xd->mi_8x8[0];
+ }
}
}
@@ -2309,8 +2340,8 @@ static void rtc_use_partition(VP9_COMP *cpi,
*dist = chosen_dist;
}
-static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
+static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, TOKENEXTRA **tp) {
VP9_COMMON * const cm = &cpi->common;
int mi_col;
@@ -2328,9 +2359,21 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
cpi->mb.source_variance = UINT_MAX;
- rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
- cpi->sf.always_this_block_size,
- &dummy_rate, &dummy_dist, 1);
+ if (cpi->sf.partition_search_type == FIXED_PARTITION) {
+ nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+ cpi->sf.always_this_block_size,
+ &dummy_rate, &dummy_dist, 1);
+ } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
+ cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+ // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
+ // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
+ // map to the same thing.
+ BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi);
+ nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+ bsize, &dummy_rate, &dummy_dist, 1);
+ } else {
+ assert(0);
+ }
}
}
// end RTC play code
@@ -2386,7 +2429,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
set_prev_mi(cm);
- if (cpi->sf.use_pick_mode) {
+ if (cpi->sf.use_nonrd_pick_mode) {
// Initialize internal buffer pointers for rtc coding, where non-RD
// mode decision is used and hence no buffer pointer swap needed.
int i;
@@ -2422,10 +2465,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
- if (cpi->sf.use_pick_mode)
- encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+ if (cpi->sf.use_nonrd_pick_mode)
+ encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
else
- encode_sb_row(cpi, &tile, mi_row, &tp);
+ encode_rd_sb_row(cpi, &tile, mi_row, &tp);
}
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2688,7 +2731,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
(cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
- !cpi->sf.use_pick_mode;
+ !cpi->sf.use_nonrd_pick_mode;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index e2ed6760e..0c58d1fcb 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -105,10 +105,9 @@ static int trellis_get_coeff_context(const int16_t *scan,
return pt;
}
-static void optimize_b(MACROBLOCK *mb,
- int plane, int block, BLOCK_SIZE plane_bsize,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- TX_SIZE tx_size) {
+static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, MACROBLOCK *mb,
+ struct optimize_ctx *ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *p = &mb->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
@@ -134,6 +133,11 @@ static void optimize_b(MACROBLOCK *mb,
const scan_order *so = get_scan(xd, tx_size, type, block);
const int16_t *scan = so->scan;
const int16_t *nb = so->neighbors;
+ ENTROPY_CONTEXT *a, *l;
+ int tx_x, tx_y;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &tx_x, &tx_y);
+ a = &ctx->ta[plane][tx_x];
+ l = &ctx->tl[plane][tx_y];
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
@@ -307,14 +311,6 @@ static void optimize_b(MACROBLOCK *mb,
*a = *l = (final_eob > 0);
}
-void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) {
- int x, y;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
- optimize_b(mb, plane, block, plane_bsize,
- &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size);
-}
-
static void optimize_init_b(int plane, BLOCK_SIZE bsize,
struct encode_b_args *args) {
const MACROBLOCKD *xd = &args->x->e_mbd;
@@ -419,7 +415,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
+ optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
} else {
ctx->ta[plane][i] = p->eobs[block] > 0;
ctx->tl[plane][j] = p->eobs[block] > 0;
@@ -522,7 +518,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
// if (x->optimize)
- // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
+ // optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
switch (tx_size) {
case TX_32X32:
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index af710a8f4..be6abc2a1 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -224,18 +224,11 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w,
}
}
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
- const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h) {
- vp9_clear_system_state();
- vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree);
- if (mvc_flag_v)
- build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
- if (mvc_flag_h)
- build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context* ctx, int usehp) {
+ vp9_cost_tokens(mvjoint, ctx->joints, vp9_mv_joint_tree);
+ build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
+ build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound,
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index f0463bbd3..bb242b6dd 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -25,12 +25,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w);
void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp);
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
- const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h);
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context* const mvctx, int usehp);
void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 8f3d82570..33f588fa1 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -717,7 +717,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
}
if (speed >= 5) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->use_one_partition_size_always = 1;
+ sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_16X16;
sf->tx_size_search_method = frame_is_intra_only(cm) ?
USE_FULL_RD : USE_LARGESTALL;
@@ -863,12 +863,12 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->search_method = FAST_HEX;
}
if (speed >= 6) {
- sf->use_one_partition_size_always = 1;
- sf->always_this_block_size = BLOCK_32X32;
+ sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
}
if (speed >= 7) {
+ sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_16X16;
- sf->use_pick_mode = 1;
+ sf->use_nonrd_pick_mode = 1;
}
}
@@ -906,7 +906,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
sf->reference_masking = 0;
- sf->use_one_partition_size_always = 0;
+ sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
@@ -928,7 +928,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_fast_lpf_pick = 0;
sf->use_fast_coef_updates = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
- sf->use_pick_mode = 0;
+ sf->use_nonrd_pick_mode = 0;
sf->encode_breakout_thresh = 0;
switch (cpi->oxcf.mode) {
@@ -2903,7 +2903,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
vp9_save_coding_context(cpi);
cpi->dummy_packing = 1;
- if (!cpi->sf.use_pick_mode)
+ if (!cpi->sf.use_nonrd_pick_mode)
vp9_pack_bitstream(cpi, dest, size);
rc->projected_frame_size = (int)(*size) << 3;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 7bcceedb8..fd2356591 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -218,6 +218,22 @@ typedef enum {
ENCODE_BREAKOUT_LIMITED = 2
} ENCODE_BREAKOUT_TYPE;
+typedef enum {
+ // Search partitions using RD/NONRD criterion
+ SEARCH_PARTITION = 0,
+
+ // Always use a fixed size partition
+ FIXED_PARTITION = 1,
+
+ // Use a fixed size partition in every 64X64 SB, where the size is
+ // determined based on source variance
+ VAR_BASED_FIXED_PARTITION = 2,
+
+ // Use an arbitrary partitioning scheme based on source variance within
+ // a 64X64 SB
+ VAR_BASED_PARTITION
+} PARTITION_SEARCH_TYPE;
+
typedef struct {
// Frame level coding parameter update
int frame_parameter_update;
@@ -304,16 +320,6 @@ typedef struct {
// TODO(JBB): remove this as its no longer used.
- // If set partition size will always be always_this_block_size.
- int use_one_partition_size_always;
-
- // Skip rectangular partition test when partition type none gives better
- // rd than partition type split.
- int less_rectangular_check;
-
- // Disable testing non square partitions. (eg 16x32)
- int use_square_partition_only;
-
// After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame
// of the best so far.
@@ -322,9 +328,18 @@ typedef struct {
// TODO(JBB): Remove this.
int reference_masking;
- // Used in conjunction with use_one_partition_size_always.
+ PARTITION_SEARCH_TYPE partition_search_type;
+
+ // Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;
+ // Skip rectangular partition test when partition type none gives better
+ // rd than partition type split.
+ int less_rectangular_check;
+
+ // Disable testing non square partitions. (eg 16x32)
+ int use_square_partition_only;
+
// Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor.
AUTO_MIN_MAX_MODE auto_min_max_partition_size;
@@ -396,7 +411,7 @@ typedef struct {
int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
// This flag controls the use of non-RD mode decision.
- int use_pick_mode;
+ int use_nonrd_pick_mode;
// This variable sets the encode_breakout threshold. Currently, it is only
// enabled in real time mode.
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 383d92751..0d0dc0cc7 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -183,9 +183,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
- MB_PREDICTION_MODE this_mode;
- MV_REFERENCE_FRAME ref_frame;
+ MB_PREDICTION_MODE this_mode, best_mode = ZEROMV;
+ MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
@@ -240,6 +242,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
+ mbmi->ref_frame[0] = ref_frame;
+
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate = cost[INTER_OFFSET(this_mode)];
int64_t dist;
@@ -253,25 +257,32 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
- dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
+ mbmi->mode = this_mode;
+ mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+
+ dist = cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, INT_MAX);
+
this_rd = rate + dist;
if (this_rd < best_rd) {
best_rd = this_rd;
- mbmi->mode = this_mode;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
- xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
- mbmi->uv_mode = this_mode;
+ best_mode = this_mode;
+ best_ref_frame = ref_frame;
}
}
}
+ mbmi->mode = best_mode;
+ mbmi->ref_frame[0] = best_ref_frame;
+ mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (best_rd > inter_mode_thresh) {
- struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &xd->plane[0];
for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
mbmi->tx_size, this_mode,
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index f78ebfe18..dc6c11816 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -958,17 +958,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
}
// Clip the active best and worst quality values to limits.
- if (active_worst_quality > rc->worst_quality)
- active_worst_quality = rc->worst_quality;
-
- if (active_best_quality < rc->best_quality)
- active_best_quality = rc->best_quality;
-
- if (active_best_quality > rc->worst_quality)
- active_best_quality = rc->worst_quality;
-
- if (active_worst_quality < active_best_quality)
- active_worst_quality = active_best_quality;
+ active_best_quality = clamp(active_best_quality,
+ rc->best_quality, rc->worst_quality);
+ active_worst_quality = clamp(active_worst_quality,
+ active_best_quality, rc->worst_quality);
*top_index = active_worst_quality;
*bottom_index = active_best_quality;
@@ -1041,7 +1034,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
// JBB : This is realtime mode. In real time mode the first frame
// should be larger. Q of 0 is disabled because we force tx size to be
// 16x16...
- if (cpi->sf.use_pick_mode) {
+ if (cpi->sf.use_nonrd_pick_mode) {
if (cpi->common.current_video_frame == 0)
q /= 3;
if (q == 0)
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 57417e0c6..371c0ced2 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -295,7 +295,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_block_thresholds(cpi);
- if (!cpi->sf.use_pick_mode) {
+ if (!cpi->sf.use_nonrd_pick_mode) {
fill_token_costs(x->token_costs, cm->fc.coef_probs);
for (i = 0; i < PARTITION_CONTEXTS; i++)
@@ -303,15 +303,14 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_partition_tree);
}
- if (!cpi->sf.use_pick_mode || (cm->current_video_frame & 0x07) == 1) {
+ if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) {
vp9_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
- &cm->fc.nmvc,
- cm->allow_high_precision_mv, 1, 1);
+ &cm->fc.nmvc, cm->allow_high_precision_mv);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp9_cost_tokens((int *)x->inter_mode_cost[i],
@@ -937,27 +936,23 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
}
}
-static void super_block_yrd(VP9_COMP *cpi,
- MACROBLOCK *x, int *rate, int64_t *distortion,
- int *skip, int64_t *psse, BLOCK_SIZE bs,
- int64_t txfm_cache[TX_MODES],
- int64_t ref_best_rd) {
+static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, BLOCK_SIZE bs,
+ int64_t txfm_cache[TX_MODES],
+ int64_t ref_best_rd) {
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- const int b_inter_mode = is_inter_block(mbmi);
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
TX_SIZE tx_size;
-
assert(bs == mbmi->sb_type);
- if (b_inter_mode)
- vp9_subtract_plane(x, bs, 0);
- if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
- (cpi->sf.tx_size_search_method != USE_FULL_RD &&
- !b_inter_mode)) {
+ vp9_subtract_plane(x, bs, 0);
+
+ if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
@@ -966,8 +961,7 @@ static void super_block_yrd(VP9_COMP *cpi,
return;
}
- if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
- b_inter_mode) {
+ if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
&r[tx_size][0], &d[tx_size], &s[tx_size]);
@@ -985,6 +979,36 @@ static void super_block_yrd(VP9_COMP *cpi,
*psse = sse[mbmi->tx_size];
}
+static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, BLOCK_SIZE bs,
+ int64_t txfm_cache[TX_MODES],
+ int64_t ref_best_rd) {
+ int64_t sse[TX_SIZES];
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+
+ assert(bs == mbmi->sb_type);
+ if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
+ vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
+ choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
+ ref_best_rd, bs);
+ } else {
+ int r[TX_SIZES][2], s[TX_SIZES];
+ int64_t d[TX_SIZES];
+ TX_SIZE tx_size;
+ for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
+ txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
+ &s[tx_size], &sse[tx_size],
+ ref_best_rd, 0, bs, tx_size);
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
+ skip, txfm_cache, bs);
+ }
+ if (psse)
+ *psse = sse[mbmi->tx_size];
+}
+
+
static int conditional_skipintra(MB_PREDICTION_MODE mode,
MB_PREDICTION_MODE best_intra_mode) {
if (mode == D117_PRED &&
@@ -1245,8 +1269,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
mic->mbmi.mode = mode;
- super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
- bsize, local_tx_cache, best_rd);
+ intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, local_tx_cache, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1281,7 +1305,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
return best_rd;
}
-static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
+static void super_block_uvrd(MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
@@ -1331,6 +1355,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
+ MACROBLOCKD *xd = &x->e_mbd;
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE mode_selected = DC_PRED;
int64_t best_rd = INT64_MAX, this_rd;
@@ -1341,9 +1366,9 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
- x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
+ xd->mi_8x8[0]->mbmi.uv_mode = mode;
- super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ super_block_uvrd(x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1361,7 +1386,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->select_txfm_size) {
int i;
struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = x->e_mbd.plane;
+ struct macroblockd_plane *const pd = xd->plane;
for (i = 1; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
@@ -1382,25 +1407,21 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
+ xd->mi_8x8[0]->mbmi.uv_mode = mode_selected;
return best_rd;
}
-static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
+static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
- int64_t this_rd;
- int64_t this_sse;
+ int64_t unused;
x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
- super_block_uvrd(cpi, x, rate_tokenonly, distortion,
- skippable, &this_sse, bsize, INT64_MAX);
- *rate = *rate_tokenonly +
- x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
- this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
-
- return this_rd;
+ super_block_uvrd(x, rate_tokenonly, distortion,
+ skippable, &unused, bsize, INT64_MAX);
+ *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
@@ -1413,8 +1434,8 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
if (cpi->sf.use_uv_intra_rd_estimate) {
- rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
- bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+ rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv,
+ skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
@@ -1428,8 +1449,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
int mode_context) {
MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+ const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
@@ -1454,7 +1474,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int *rate_mv);
static int labels2mode(MACROBLOCK *x, int i,
- MB_PREDICTION_MODE this_mode,
+ MB_PREDICTION_MODE mode,
int_mv *this_mv, int_mv *this_second_mv,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
int_mv seg_mvs[MAX_REF_FRAMES],
@@ -1464,23 +1484,18 @@ static int labels2mode(MACROBLOCK *x, int i,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi_8x8[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
- int cost = 0, thismvcost = 0;
+ int thismvcost = 0;
int idx, idy;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
const int has_second_rf = has_second_ref(mbmi);
- /* We have to be careful retrieving previously-encoded motion vectors.
- Ones from this macroblock have to be pulled from the BLOCKD array
- as they have not yet made it to the bmi array in our MB_MODE_INFO. */
- MB_PREDICTION_MODE m;
-
// the only time we should do costing for new motion vector or mode
// is when we are on a new label (jbb May 08, 2007)
- switch (m = this_mode) {
+ switch (mode) {
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+ thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
if (has_second_rf) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
@@ -1492,14 +1507,12 @@ static int labels2mode(MACROBLOCK *x, int i,
case NEARESTMV:
this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
- this_second_mv->as_int =
- frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
+ this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
break;
case NEARMV:
this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
- this_second_mv->as_int =
- frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
+ this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
break;
case ZEROMV:
this_mv->as_int = 0;
@@ -1510,22 +1523,19 @@ static int labels2mode(MACROBLOCK *x, int i,
break;
}
- cost = cost_mv_ref(cpi, this_mode,
- mbmi->mode_context[mbmi->ref_frame[0]]);
-
mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
if (has_second_rf)
mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
- mic->bmi[i].as_mode = m;
+ mic->bmi[i].as_mode = mode;
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
&mic->bmi[i], sizeof(mic->bmi[i]));
- cost += thismvcost;
- return cost;
+ return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
+ thismvcost;
}
static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
@@ -3008,8 +3018,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t rdcosty = INT64_MAX;
// Y cost and distortion
- super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
- bsize, txfm_cache, ref_best_rd);
+ inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
+ bsize, txfm_cache, ref_best_rd);
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
@@ -3024,7 +3034,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
- super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
+ super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
bsize, ref_best_rd - rdcosty);
if (*rate_uv == INT_MAX) {
*rate2 = INT_MAX;
@@ -3394,8 +3404,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
- super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
- bsize, tx_cache, best_rd);
+ intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
+ bsize, tx_cache, best_rd);
if (rate_y == INT_MAX)
continue;
@@ -4151,7 +4161,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_8X8);
- super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable,
&uv_sse, BLOCK_8X8, tmp_best_rdu);
if (rate_uv == INT_MAX)
continue;
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 2be00ff62..7ae110707 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -23,8 +23,8 @@
static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
const TOKENVALUE *vp9_dct_value_tokens_ptr;
-static int dct_value_cost[DCT_MAX_VALUE * 2];
-const int *vp9_dct_value_cost_ptr;
+static int16_t dct_value_cost[DCT_MAX_VALUE * 2];
+const int16_t *vp9_dct_value_cost_ptr;
// Array indices are identical to previously-existing CONTEXT_NODE indices
const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index ea86240be..063c0bafe 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -47,7 +47,7 @@ struct VP9_COMP;
void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
BLOCK_SIZE bsize);
-extern const int *vp9_dct_value_cost_ptr;
+extern const int16_t *vp9_dct_value_cost_ptr;
/* TODO: The Token field should be broken out into a separate char array to
* improve cache locality, since it's needed for costing when the rest of the
* fields are not.