diff options
26 files changed, 538 insertions, 550 deletions
diff --git a/examples/vpx_temporal_scalable_patterns.c b/examples/vpx_temporal_scalable_patterns.c index 29a266d29..6ec1b6208 100644 --- a/examples/vpx_temporal_scalable_patterns.c +++ b/examples/vpx_temporal_scalable_patterns.c @@ -52,6 +52,12 @@ struct RateControlMetrics { double layer_encoding_bitrate[VPX_TS_MAX_LAYERS]; }; +// Note: these rate control metrics assume only 1 key frame in the +// sequence (i.e., first frame only). So for temporal pattern# 7 +// (which has key frame for every frame on base layer), the metrics +// computation will be off/wrong. +// TODO(marpan): Update these metrics to account for multiple key frames +// in the stream. static void set_rate_control_metrics(struct RateControlMetrics *rc, vpx_codec_enc_cfg_t *cfg) { unsigned int i = 0; @@ -565,6 +571,9 @@ int main(int argc, char **argv) { } vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1); + // This controls the maximum target size of the key frame. + // For generating smaller key frames, use a smaller max_intra_size_pct + // value, like 100 or 200. max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5) * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0); vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); diff --git a/test/cq_test.cc b/test/cq_test.cc index a2c829163..7da7b80aa 100644 --- a/test/cq_test.cc +++ b/test/cq_test.cc @@ -20,7 +20,7 @@ namespace { const int kCQLevelMin = 4; const int kCQLevelMax = 63; const int kCQLevelStep = 8; -const int kCQTargetBitrate = 2000; +const unsigned int kCQTargetBitrate = 2000; class CQTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<int> { @@ -66,17 +66,17 @@ class CQTest : public ::libvpx_test::EncoderTest, return pow(10.0, avg_psnr / 10.0) / file_size_; } - int file_size() const { return file_size_; } + size_t file_size() const { return file_size_; } int n_frames() const { return n_frames_; } private: int cq_level_; - int file_size_; + size_t file_size_; double psnr_; int n_frames_; }; -int prev_actual_bitrate = kCQTargetBitrate; +unsigned int prev_actual_bitrate = kCQTargetBitrate; TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; @@ -88,7 +88,8 @@ TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) { timebase.den, timebase.num, 0, 30); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double cq_psnr_lin = GetLinearPSNROverBitrate(); - const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000); + const unsigned int cq_actual_bitrate = + static_cast<unsigned int>(file_size()) * 8 * 30 / (n_frames() * 1000); EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate); EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate); prev_actual_bitrate = cq_actual_bitrate; diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc index 30c20e91f..4cd9efb86 100644 --- a/test/error_resilience_test.cc +++ b/test/error_resilience_test.cc @@ -16,8 +16,8 @@ namespace { -const int kMaxErrorFrames = 8; -const int kMaxDroppableFrames = 8; +const int kMaxErrorFrames = 12; +const int kMaxDroppableFrames = 12; class ErrorResilienceTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { @@ -175,6 +175,10 @@ TEST_P(ErrorResilienceTest, OnVersusOff) { } } +// Check for successful decoding and no encoder/decoder mismatch +// if we lose (i.e., drop before decoding) a set of droppable +// frames (i.e., frames that don't update any reference buffers). +// Check both isolated and consecutive loss. TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; @@ -186,14 +190,18 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { init_flags_ = VPX_CODEC_USE_PSNR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - timebase.den, timebase.num, 0, 30); + timebase.den, timebase.num, 0, 40); // Error resilient mode ON. cfg_.g_error_resilient = 1; - - // Set an arbitrary set of error frames same as droppable frames - unsigned int num_droppable_frames = 2; - unsigned int droppable_frame_list[] = {5, 16}; + cfg_.kf_mode = VPX_KF_DISABLED; + + // Set an arbitrary set of error frames same as droppable frames. + // In addition to isolated loss/drop, add a long consecutive series + // (of size 9) of dropped frames. + unsigned int num_droppable_frames = 11; + unsigned int droppable_frame_list[] = {5, 16, 22, 23, 24, 25, 26, 27, 28, + 29, 30}; SetDroppableFrames(num_droppable_frames, droppable_frame_list); SetErrorFrames(num_droppable_frames, droppable_frame_list); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); @@ -202,7 +210,7 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { << GetMismatchFrames() << "\n"; EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0); - // reset previously set error/droppable frames + // Reset previously set of error/droppable frames. Reset(); #if 0 diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm deleted file mode 100644 index bda4b9654..000000000 --- a/vp8/common/arm/neon/copymem16x16_neon.asm +++ /dev/null @@ -1,59 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_copy_mem16x16_neon| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp8_copy_mem16x16_neon| PROC - - vld1.u8 {q0}, [r0], r1 - vld1.u8 {q1}, [r0], r1 - vld1.u8 {q2}, [r0], r1 - vst1.u8 {q0}, [r2], r3 - vld1.u8 {q3}, [r0], r1 - vst1.u8 {q1}, [r2], r3 - vld1.u8 {q4}, [r0], r1 - vst1.u8 {q2}, [r2], r3 - vld1.u8 {q5}, [r0], r1 - vst1.u8 {q3}, [r2], r3 - vld1.u8 {q6}, [r0], r1 - vst1.u8 {q4}, [r2], r3 - vld1.u8 {q7}, [r0], r1 - vst1.u8 {q5}, [r2], r3 - vld1.u8 {q8}, [r0], r1 - vst1.u8 {q6}, [r2], r3 - vld1.u8 {q9}, [r0], r1 - vst1.u8 {q7}, [r2], r3 - vld1.u8 {q10}, [r0], r1 - vst1.u8 {q8}, [r2], r3 - vld1.u8 {q11}, [r0], r1 - vst1.u8 {q9}, [r2], r3 - vld1.u8 {q12}, [r0], r1 - vst1.u8 {q10}, [r2], r3 - vld1.u8 {q13}, [r0], r1 - vst1.u8 {q11}, [r2], r3 - vld1.u8 {q14}, [r0], r1 - vst1.u8 {q12}, [r2], r3 - vld1.u8 {q15}, [r0], r1 - vst1.u8 {q13}, [r2], r3 - vst1.u8 {q14}, [r2], r3 - vst1.u8 {q15}, [r2], r3 - - mov pc, lr - - ENDP ; |vp8_copy_mem16x16_neon| - - END diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm deleted file mode 100644 index 35c0f6708..000000000 --- a/vp8/common/arm/neon/copymem8x4_neon.asm +++ /dev/null @@ -1,34 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_copy_mem8x4_neon| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp8_copy_mem8x4_neon| PROC - vld1.u8 {d0}, [r0], r1 - vld1.u8 {d1}, [r0], r1 - vst1.u8 {d0}, [r2], r3 - vld1.u8 {d2}, [r0], r1 - vst1.u8 {d1}, [r2], r3 - vld1.u8 {d3}, [r0], r1 - vst1.u8 {d2}, [r2], r3 - vst1.u8 {d3}, [r2], r3 - - mov pc, lr - - ENDP ; |vp8_copy_mem8x4_neon| - - END diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm deleted file mode 100644 index 1f5b9411b..000000000 --- a/vp8/common/arm/neon/copymem8x8_neon.asm +++ /dev/null @@ -1,43 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_copy_mem8x8_neon| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp8_copy_mem8x8_neon| PROC - - vld1.u8 {d0}, [r0], r1 - vld1.u8 {d1}, [r0], r1 - vst1.u8 {d0}, [r2], r3 - vld1.u8 {d2}, [r0], r1 - vst1.u8 {d1}, [r2], r3 - vld1.u8 {d3}, [r0], r1 - vst1.u8 {d2}, [r2], r3 - vld1.u8 {d4}, [r0], r1 - vst1.u8 {d3}, [r2], r3 - vld1.u8 {d5}, [r0], r1 - vst1.u8 {d4}, [r2], r3 - vld1.u8 {d6}, [r0], r1 - vst1.u8 {d5}, [r2], r3 - vld1.u8 {d7}, [r0], r1 - vst1.u8 {d6}, [r2], r3 - vst1.u8 {d7}, [r2], r3 - - mov pc, lr - - ENDP ; |vp8_copy_mem8x8_neon| - - END diff --git a/vp8/common/arm/neon/copymem_neon.c b/vp8/common/arm/neon/copymem_neon.c new file mode 100644 index 000000000..deced115c --- /dev/null +++ b/vp8/common/arm/neon/copymem_neon.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +void vp8_copy_mem8x4_neon( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) { + uint8x8_t vtmp; + int r; + + for (r = 0; r < 4; r++) { + vtmp = vld1_u8(src); + vst1_u8(dst, vtmp); + src += src_stride; + dst += dst_stride; + } +} + +void vp8_copy_mem8x8_neon( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) { + uint8x8_t vtmp; + int r; + + for (r = 0; r < 8; r++) { + vtmp = vld1_u8(src); + vst1_u8(dst, vtmp); + src += src_stride; + dst += dst_stride; + } +} + +void vp8_copy_mem16x16_neon( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) { + int r; + uint8x16_t qtmp; + + for (r = 0; r < 16; r++) { + qtmp = vld1q_u8(src); + vst1q_u8(dst, qtmp); + src += src_stride; + dst += dst_stride; + } +} diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm deleted file mode 100644 index 79ff02c69..000000000 --- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm +++ /dev/null @@ -1,54 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_dc_only_idct_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, -; int pred_stride, unsigned char *dst_ptr, -; int dst_stride) - -; r0 input_dc -; r1 pred_ptr -; r2 pred_stride -; r3 dst_ptr -; sp dst_stride - -|vp8_dc_only_idct_add_neon| PROC - add r0, r0, #4 - asr r0, r0, #3 - ldr r12, [sp] - vdup.16 q0, r0 - - vld1.32 {d2[0]}, [r1], r2 - vld1.32 {d2[1]}, [r1], r2 - vld1.32 {d4[0]}, [r1], r2 - vld1.32 {d4[1]}, [r1] - - vaddw.u8 q1, q0, d2 - vaddw.u8 q2, q0, d4 - - vqmovun.s16 d2, q1 - vqmovun.s16 d4, q2 - - vst1.32 {d2[0]}, [r3], r12 - vst1.32 {d2[1]}, [r3], r12 - vst1.32 {d4[0]}, [r3], r12 - vst1.32 {d4[1]}, [r3] - - bx lr - - ENDP - - END diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.c b/vp8/common/arm/neon/dc_only_idct_add_neon.c new file mode 100644 index 000000000..ad5f41d7d --- /dev/null +++ b/vp8/common/arm/neon/dc_only_idct_add_neon.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +void vp8_dc_only_idct_add_neon( + int16_t input_dc, + unsigned char *pred_ptr, + int pred_stride, + unsigned char *dst_ptr, + int dst_stride) { + int i; + uint16_t a1 = ((input_dc + 4) >> 3); + uint32x2_t d2u32 = vdup_n_u32(0); + uint8x8_t d2u8; + uint16x8_t q1u16; + uint16x8_t qAdd; + + qAdd = vdupq_n_u16(a1); + + for (i = 0; i < 2; i++) { + d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0); + pred_ptr += pred_stride; + d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1); + pred_ptr += pred_stride; + + q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); + + vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); + dst_ptr += dst_stride; + vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); + dst_ptr += dst_stride; + } +} diff --git a/vp8/common/arm/neon/dequant_idct_neon.asm b/vp8/common/arm/neon/dequant_idct_neon.asm deleted file mode 100644 index 602cce676..000000000 --- a/vp8/common/arm/neon/dequant_idct_neon.asm +++ /dev/null @@ -1,131 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequant_idct_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_dequant_idct_add_neon(short *input, short *dq, -; unsigned char *dest, int stride) -; r0 short *input, -; r1 short *dq, -; r2 unsigned char *dest -; r3 int stride - -|vp8_dequant_idct_add_neon| PROC - vld1.16 {q3, q4}, [r0] - vld1.16 {q5, q6}, [r1] - - add r1, r2, r3 ; r1 = dest + stride - lsl r3, #1 ; 2x stride - - vld1.32 {d14[0]}, [r2], r3 - vld1.32 {d14[1]}, [r1], r3 - vld1.32 {d15[0]}, [r2] - vld1.32 {d15[1]}, [r1] - - adr r12, cospi8sqrt2minus1 ; pointer to the first constant - - vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon - vmul.i16 q2, q4, q6 - -;|short_idct4x4llm_neon| PROC - vld1.16 {d0}, [r12] - vswp d3, d4 ;q2(vp[4] vp[12]) - - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - -; memset(input, 0, 32) -- 32bytes - vmov.i16 q14, #0 - - vswp d3, d4 - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vmov q15, q14 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vst1.16 {q14, q15}, [r0] - - vrshr.s16 d2, d2, #3 - vrshr.s16 d3, d3, #3 - vrshr.s16 d4, d4, #3 - vrshr.s16 d5, d5, #3 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - - vaddw.u8 q1, q1, d14 - vaddw.u8 q2, q2, d15 - - sub r2, r2, r3 - sub r1, r1, r3 - - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - - vst1.32 {d0[0]}, [r2], r3 - vst1.32 {d0[1]}, [r1], r3 - vst1.32 {d1[0]}, [r2] - vst1.32 {d1[1]}, [r1] - - bx lr - - ENDP ; |vp8_dequant_idct_add_neon| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x4e7b4e7b -sinpi8sqrt2 DCD 0x8a8c8a8c - - END diff --git a/vp8/common/arm/neon/dequant_idct_neon.c b/vp8/common/arm/neon/dequant_idct_neon.c new file mode 100644 index 000000000..58e11922c --- /dev/null +++ b/vp8/common/arm/neon/dequant_idct_neon.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +static const int16_t cospi8sqrt2minus1 = 20091; +static const int16_t sinpi8sqrt2 = 35468; + +void vp8_dequant_idct_add_neon( + int16_t *input, + int16_t *dq, + unsigned char *dst, + int stride) { + unsigned char *dst0; + int32x2_t d14, d15; + int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; + int16x8_t q1, q2, q3, q4, q5, q6; + int16x8_t qEmpty = vdupq_n_s16(0); + int32x2x2_t d2tmp0, d2tmp1; + int16x4x2_t d2tmp2, d2tmp3; + + d14 = d15 = vdup_n_s32(0); + + // load input + q3 = vld1q_s16(input); + vst1q_s16(input, qEmpty); + input += 8; + q4 = vld1q_s16(input); + vst1q_s16(input, qEmpty); + + // load dq + q5 = vld1q_s16(dq); + dq += 8; + q6 = vld1q_s16(dq); + + // load src from dst + dst0 = dst; + d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0); + dst0 += stride; + d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1); + dst0 += stride; + d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0); + dst0 += stride; + d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1); + + q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3), + vreinterpretq_u16_s16(q5))); + q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4), + vreinterpretq_u16_s16(q6))); + + d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); + d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); + + q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2)); + + q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); + q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); + + q3 = vshrq_n_s16(q3, 1); + q4 = vshrq_n_s16(q4, 1); + + q3 = vqaddq_s16(q3, q2); + q4 = vqaddq_s16(q4, q2); + + d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); + d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); + + d2 = vqadd_s16(d12, d11); + d3 = vqadd_s16(d13, d10); + d4 = vqsub_s16(d13, d10); + d5 = vqsub_s16(d12, d11); + + d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); + d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); + d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), + vreinterpret_s16_s32(d2tmp1.val[0])); + d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), + vreinterpret_s16_s32(d2tmp1.val[1])); + + // loop 2 + q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]); + + q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); + q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); + + d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]); + d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]); + + q3 = vshrq_n_s16(q3, 1); + q4 = vshrq_n_s16(q4, 1); + + q3 = vqaddq_s16(q3, q2); + q4 = vqaddq_s16(q4, q2); + + d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); + d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); + + d2 = vqadd_s16(d12, d11); + d3 = vqadd_s16(d13, d10); + d4 = vqsub_s16(d13, d10); + d5 = vqsub_s16(d12, d11); + + d2 = vrshr_n_s16(d2, 3); + d3 = vrshr_n_s16(d3, 3); + d4 = vrshr_n_s16(d4, 3); + d5 = vrshr_n_s16(d5, 3); + + d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); + d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); + d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), + vreinterpret_s16_s32(d2tmp1.val[0])); + d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), + vreinterpret_s16_s32(d2tmp1.val[1])); + + q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]); + q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]); + + q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1), + vreinterpret_u8_s32(d14))); + q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2), + vreinterpret_u8_s32(d15))); + + d14 = vreinterpret_s32_u8(vqmovun_s16(q1)); + d15 = vreinterpret_s32_u8(vqmovun_s16(q2)); + + dst0 = dst; + vst1_lane_s32((int32_t *)dst0, d14, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d14, 1); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d15, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d15, 1); + return; +} diff --git a/vp8/common/arm/neon/dequantizeb_neon.asm b/vp8/common/arm/neon/dequantizeb_neon.asm deleted file mode 100644 index c8e0c31f2..000000000 --- a/vp8/common/arm/neon/dequantizeb_neon.asm +++ /dev/null @@ -1,34 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequantize_b_loop_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 short *Q, -; r1 short *DQC -; r2 short *DQ -|vp8_dequantize_b_loop_neon| PROC - vld1.16 {q0, q1}, [r0] - vld1.16 {q2, q3}, [r1] - - vmul.i16 q4, q0, q2 - vmul.i16 q5, q1, q3 - - vst1.16 {q4, q5}, [r2] - - bx lr - - ENDP - - END diff --git a/vp8/common/arm/neon/dequantizeb_neon.c b/vp8/common/arm/neon/dequantizeb_neon.c new file mode 100644 index 000000000..60f69c8db --- /dev/null +++ b/vp8/common/arm/neon/dequantizeb_neon.c @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +void vp8_dequantize_b_loop_neon( + int16_t *Q, + int16_t *DQC, + int16_t *DQ) { + int16x8x2_t qQ, qDQC, qDQ; + + qQ = vld2q_s16(Q); + qDQC = vld2q_s16(DQC); + + qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); + qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); + + vst2q_s16(DQ, qDQ); + return; +} diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index a2127c9b7..ac91d7af5 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -159,10 +159,6 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM) # common (neon) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x4_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x8_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem16x16_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM) @@ -177,10 +173,8 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM) @@ -189,6 +183,10 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon # common (neon intrinsics) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c +VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c +VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c +VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c +VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh)) diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 96ba3e464..71a41a9de 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -382,34 +382,34 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, /* slower path if the block needs border extension */ if (x0 + 2 * bs <= frame_width) { if (right_available && bs == 4) { - vpx_memcpy(above_row - 1, above_ref - 1, 2 * bs + 1); + vpx_memcpy(above_row, above_ref, 2 * bs); } else { - vpx_memcpy(above_row - 1, above_ref - 1, bs + 1); + vpx_memcpy(above_row, above_ref, bs); vpx_memset(above_row + bs, above_row[bs - 1], bs); } } else if (x0 + bs <= frame_width) { const int r = frame_width - x0; if (right_available && bs == 4) { - vpx_memcpy(above_row - 1, above_ref - 1, r + 1); + vpx_memcpy(above_row, above_ref, r); vpx_memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } else { - vpx_memcpy(above_row - 1, above_ref - 1, bs + 1); + vpx_memcpy(above_row, above_ref, bs); vpx_memset(above_row + bs, above_row[bs - 1], bs); } } else if (x0 <= frame_width) { const int r = frame_width - x0; if (right_available && bs == 4) { - vpx_memcpy(above_row - 1, above_ref - 1, r + 1); + vpx_memcpy(above_row, above_ref, r); vpx_memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } else { - vpx_memcpy(above_row - 1, above_ref - 1, r + 1); + vpx_memcpy(above_row, above_ref, r); vpx_memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } - above_row[-1] = left_available ? above_ref[-1] : 129; } + above_row[-1] = left_available ? above_ref[-1] : 129; } else { /* faster path if the block does not need extension */ if (bs == 4 && right_available && left_available) { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f529c9336..a9b51e0d4 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -102,6 +102,24 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x, return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } +static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi) { + unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64); + if (var < 256) + return BLOCK_64X64; + else + return BLOCK_32X32; +} + +static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi) { + unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64); + if (var < 1024) + return BLOCK_32X32; + else if (var < 4096) + return BLOCK_16X16; + else + return BLOCK_8X8; +} + // Original activity measure from Tim T's code. static unsigned int tt_activity_measure(MACROBLOCK *x) { unsigned int sse; @@ -994,7 +1012,7 @@ static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile, for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { int index = block_row * mis + block_col; // Find a partition size that fits - bsize = find_partition_size(cpi->sf.always_this_block_size, + bsize = find_partition_size(bsize, (row8x8_remaining - block_row), (col8x8_remaining - block_col), &bh, &bw); mi_8x8[index] = mi_upper_left + index; @@ -1914,8 +1932,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } -static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, TOKENEXTRA **tp) { +static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; int mi_col; @@ -1946,19 +1964,32 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if (cpi->sf.use_lastframe_partitioning || - cpi->sf.use_one_partition_size_always ) { + if ((cpi->sf.partition_search_type == SEARCH_PARTITION && + cpi->sf.use_lastframe_partitioning) || + cpi->sf.partition_search_type == FIXED_PARTITION || + cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { const int idx_str = cm->mode_info_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; cpi->mb.source_variance = UINT_MAX; - if (cpi->sf.use_one_partition_size_always) { + if (cpi->sf.partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, cpi->sf.always_this_block_size); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); + } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION || + cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case. + // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION + // map to the same thing. + BLOCK_SIZE bsize; + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + bsize = get_rd_var_based_fixed_partition(cpi); + set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1); } else { if ((cm->current_video_frame % cpi->sf.last_partitioning_redo_frequency) == 0 @@ -2252,12 +2283,12 @@ static INLINE int get_block_col(int b32i, int b16i, int b8i) { return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1); } -static void rtc_use_partition(VP9_COMP *cpi, - const TileInfo *const tile, - MODE_INFO **mi_8x8, - TOKENEXTRA **tp, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist, - int do_recon) { +static void nonrd_use_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + TOKENEXTRA **tp, int mi_row, int mi_col, + BLOCK_SIZE bsize, int *rate, int64_t *dist, + int do_recon) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -2270,8 +2301,8 @@ static void rtc_use_partition(VP9_COMP *cpi, int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row); int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col); - int mi_8x8_width = num_8x8_blocks_wide_lookup[bsize]; - int mi_8x8_hight = num_8x8_blocks_high_lookup[bsize]; + int bw = num_8x8_blocks_wide_lookup[bsize]; + int bh = num_8x8_blocks_high_lookup[bsize]; int brate; int64_t bdist; @@ -2279,14 +2310,13 @@ static void rtc_use_partition(VP9_COMP *cpi, *dist = 0; // find prediction mode for each 8x8 block - for (br = 0; br < rows; br += mi_8x8_hight) { - for (bc = 0; bc < cols; bc += mi_8x8_width) { + for (br = 0; br < rows; br += bh) { + for (bc = 0; bc < cols; bc += bw) { int row = mi_row + br; int col = mi_col + bc; - int bh = 0, bw = 0; + BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc, &bh, &bw); - set_offsets(cpi, tile, row, col, bs); if (cm->frame_type != KEY_FRAME) @@ -2298,8 +2328,9 @@ static void rtc_use_partition(VP9_COMP *cpi, *dist += bdist; for (j = 0; j < bh; ++j) - for (i = 0; i < bw; ++i) + for (i = 0; i < bw; ++i) { xd->mi_8x8[j * mis + i] = xd->mi_8x8[0]; + } } } @@ -2309,8 +2340,8 @@ static void rtc_use_partition(VP9_COMP *cpi, *dist = chosen_dist; } -static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, TOKENEXTRA **tp) { +static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, TOKENEXTRA **tp) { VP9_COMMON * const cm = &cpi->common; int mi_col; @@ -2328,9 +2359,21 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; cpi->mb.source_variance = UINT_MAX; - rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, - cpi->sf.always_this_block_size, - &dummy_rate, &dummy_dist, 1); + if (cpi->sf.partition_search_type == FIXED_PARTITION) { + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + cpi->sf.always_this_block_size, + &dummy_rate, &dummy_dist, 1); + } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION || + cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case. + // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION + // map to the same thing. + BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + bsize, &dummy_rate, &dummy_dist, 1); + } else { + assert(0); + } } } // end RTC play code @@ -2386,7 +2429,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { set_prev_mi(cm); - if (cpi->sf.use_pick_mode) { + if (cpi->sf.use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD // mode decision is used and hence no buffer pointer swap needed. int i; @@ -2422,10 +2465,10 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_pick_mode) - encode_rtc_sb_row(cpi, &tile, mi_row, &tp); + if (cpi->sf.use_nonrd_pick_mode) + encode_nonrd_sb_row(cpi, &tile, mi_row, &tp); else - encode_sb_row(cpi, &tile, mi_row, &tp); + encode_rd_sb_row(cpi, &tile, mi_row, &tp); } cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); @@ -2688,7 +2731,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && (cpi->oxcf.aq_mode != COMPLEXITY_AQ) && - !cpi->sf.use_pick_mode; + !cpi->sf.use_nonrd_pick_mode; x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index e2ed6760e..0c58d1fcb 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -105,10 +105,9 @@ static int trellis_get_coeff_context(const int16_t *scan, return pt; } -static void optimize_b(MACROBLOCK *mb, - int plane, int block, BLOCK_SIZE plane_bsize, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - TX_SIZE tx_size) { +static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, MACROBLOCK *mb, + struct optimize_ctx *ctx) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *p = &mb->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; @@ -134,6 +133,11 @@ static void optimize_b(MACROBLOCK *mb, const scan_order *so = get_scan(xd, tx_size, type, block); const int16_t *scan = so->scan; const int16_t *nb = so->neighbors; + ENTROPY_CONTEXT *a, *l; + int tx_x, tx_y; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &tx_x, &tx_y); + a = &ctx->ta[plane][tx_x]; + l = &ctx->tl[plane][tx_y]; assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); @@ -307,14 +311,6 @@ static void optimize_b(MACROBLOCK *mb, *a = *l = (final_eob > 0); } -void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { - int x, y; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); - optimize_b(mb, plane, block, plane_bsize, - &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); -} - static void optimize_init_b(int plane, BLOCK_SIZE bsize, struct encode_b_args *args) { const MACROBLOCKD *xd = &args->x->e_mbd; @@ -419,7 +415,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(x, plane, block, plane_bsize, tx_size); if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { - vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); + optimize_b(plane, block, plane_bsize, tx_size, x, ctx); } else { ctx->ta[plane][i] = p->eobs[block] > 0; ctx->tl[plane][j] = p->eobs[block] > 0; @@ -522,7 +518,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, src_diff = &p->src_diff[4 * (j * diff_stride + i)]; // if (x->optimize) - // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); + // optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); switch (tx_size) { case TX_32X32: diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index af710a8f4..be6abc2a1 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -224,18 +224,11 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w, } } -void vp9_build_nmv_cost_table(int *mvjoint, - int *mvcost[2], - const nmv_context* const mvctx, - int usehp, - int mvc_flag_v, - int mvc_flag_h) { - vp9_clear_system_state(); - vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree); - if (mvc_flag_v) - build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp); - if (mvc_flag_h) - build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp); +void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], + const nmv_context* ctx, int usehp) { + vp9_cost_tokens(mvjoint, ctx->joints, vp9_mv_joint_tree); + build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp); + build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp); } static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound, diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index f0463bbd3..bb242b6dd 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -25,12 +25,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w); void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref, const nmv_context* mvctx, int usehp); -void vp9_build_nmv_cost_table(int *mvjoint, - int *mvcost[2], - const nmv_context* const mvctx, - int usehp, - int mvc_flag_v, - int mvc_flag_h); +void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], + const nmv_context* const mvctx, int usehp); void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 8f3d82570..33f588fa1 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -717,7 +717,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, } if (speed >= 5) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->use_one_partition_size_always = 1; + sf->partition_search_type = FIXED_PARTITION; sf->always_this_block_size = BLOCK_16X16; sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; @@ -863,12 +863,12 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->search_method = FAST_HEX; } if (speed >= 6) { - sf->use_one_partition_size_always = 1; - sf->always_this_block_size = BLOCK_32X32; + sf->partition_search_type = VAR_BASED_FIXED_PARTITION; } if (speed >= 7) { + sf->partition_search_type = FIXED_PARTITION; sf->always_this_block_size = BLOCK_16X16; - sf->use_pick_mode = 1; + sf->use_nonrd_pick_mode = 1; } } @@ -906,7 +906,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->adaptive_motion_search = 0; sf->adaptive_pred_interp_filter = 0; sf->reference_masking = 0; - sf->use_one_partition_size_always = 0; + sf->partition_search_type = SEARCH_PARTITION; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; sf->auto_min_max_partition_size = NOT_IN_USE; @@ -928,7 +928,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_fast_lpf_pick = 0; sf->use_fast_coef_updates = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - sf->use_pick_mode = 0; + sf->use_nonrd_pick_mode = 0; sf->encode_breakout_thresh = 0; switch (cpi->oxcf.mode) { @@ -2903,7 +2903,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { vp9_save_coding_context(cpi); cpi->dummy_packing = 1; - if (!cpi->sf.use_pick_mode) + if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size); rc->projected_frame_size = (int)(*size) << 3; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7bcceedb8..fd2356591 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -218,6 +218,22 @@ typedef enum { ENCODE_BREAKOUT_LIMITED = 2 } ENCODE_BREAKOUT_TYPE; +typedef enum { + // Search partitions using RD/NONRD criterion + SEARCH_PARTITION = 0, + + // Always use a fixed size partition + FIXED_PARTITION = 1, + + // Use a fixed size partition in every 64X64 SB, where the size is + // determined based on source variance + VAR_BASED_FIXED_PARTITION = 2, + + // Use an arbitrary partitioning scheme based on source variance within + // a 64X64 SB + VAR_BASED_PARTITION +} PARTITION_SEARCH_TYPE; + typedef struct { // Frame level coding parameter update int frame_parameter_update; @@ -304,16 +320,6 @@ typedef struct { // TODO(JBB): remove this as its no longer used. - // If set partition size will always be always_this_block_size. - int use_one_partition_size_always; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. - int less_rectangular_check; - - // Disable testing non square partitions. (eg 16x32) - int use_square_partition_only; - // After looking at the first set of modes (set by index here), skip // checking modes for reference frames that don't match the reference frame // of the best so far. @@ -322,9 +328,18 @@ typedef struct { // TODO(JBB): Remove this. int reference_masking; - // Used in conjunction with use_one_partition_size_always. + PARTITION_SEARCH_TYPE partition_search_type; + + // Used if partition_search_type = FIXED_SIZE_PARTITION BLOCK_SIZE always_this_block_size; + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. + int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) + int use_square_partition_only; + // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; @@ -396,7 +411,7 @@ typedef struct { int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced // This flag controls the use of non-RD mode decision. - int use_pick_mode; + int use_nonrd_pick_mode; // This variable sets the encode_breakout threshold. Currently, it is only // enabled in real time mode. diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 383d92751..0d0dc0cc7 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -183,9 +183,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + struct macroblock_plane *const p = &x->plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); - MB_PREDICTION_MODE this_mode; - MV_REFERENCE_FRAME ref_frame; + MB_PREDICTION_MODE this_mode, best_mode = ZEROMV; + MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, @@ -240,6 +242,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd); clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd); + mbmi->ref_frame[0] = ref_frame; + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate = cost[INTER_OFFSET(this_mode)]; int64_t dist; @@ -253,25 +257,32 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; } - dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)]; + mbmi->mode = this_mode; + mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + + dist = cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, INT_MAX); + this_rd = rate + dist; if (this_rd < best_rd) { best_rd = this_rd; - mbmi->mode = this_mode; - mbmi->ref_frame[0] = ref_frame; - mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; - mbmi->uv_mode = this_mode; + best_mode = this_mode; + best_ref_frame = ref_frame; } } } + mbmi->mode = best_mode; + mbmi->ref_frame[0] = best_ref_frame; + mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; + xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + // Perform intra prediction search, if the best SAD is above a certain // threshold. if (best_rd > inter_mode_thresh) { - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { vp9_predict_intra_block(xd, 0, b_width_log2(bsize), mbmi->tx_size, this_mode, diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index f78ebfe18..dc6c11816 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -958,17 +958,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, } // Clip the active best and worst quality values to limits. - if (active_worst_quality > rc->worst_quality) - active_worst_quality = rc->worst_quality; - - if (active_best_quality < rc->best_quality) - active_best_quality = rc->best_quality; - - if (active_best_quality > rc->worst_quality) - active_best_quality = rc->worst_quality; - - if (active_worst_quality < active_best_quality) - active_worst_quality = active_best_quality; + active_best_quality = clamp(active_best_quality, + rc->best_quality, rc->worst_quality); + active_worst_quality = clamp(active_worst_quality, + active_best_quality, rc->worst_quality); *top_index = active_worst_quality; *bottom_index = active_best_quality; @@ -1041,7 +1034,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, // JBB : This is realtime mode. In real time mode the first frame // should be larger. Q of 0 is disabled because we force tx size to be // 16x16... - if (cpi->sf.use_pick_mode) { + if (cpi->sf.use_nonrd_pick_mode) { if (cpi->common.current_video_frame == 0) q /= 3; if (q == 0) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 57417e0c6..371c0ced2 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -295,7 +295,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { set_block_thresholds(cpi); - if (!cpi->sf.use_pick_mode) { + if (!cpi->sf.use_nonrd_pick_mode) { fill_token_costs(x->token_costs, cm->fc.coef_probs); for (i = 0; i < PARTITION_CONTEXTS; i++) @@ -303,15 +303,14 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_partition_tree); } - if (!cpi->sf.use_pick_mode || (cm->current_video_frame & 0x07) == 1) { + if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) { fill_mode_costs(cpi); if (!frame_is_intra_only(cm)) { vp9_build_nmv_cost_table(x->nmvjointcost, cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, - &cm->fc.nmvc, - cm->allow_high_precision_mv, 1, 1); + &cm->fc.nmvc, cm->allow_high_precision_mv); for (i = 0; i < INTER_MODE_CONTEXTS; ++i) vp9_cost_tokens((int *)x->inter_mode_cost[i], @@ -937,27 +936,23 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, } } -static void super_block_yrd(VP9_COMP *cpi, - MACROBLOCK *x, int *rate, int64_t *distortion, - int *skip, int64_t *psse, BLOCK_SIZE bs, - int64_t txfm_cache[TX_MODES], - int64_t ref_best_rd) { +static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int64_t *distortion, int *skip, + int64_t *psse, BLOCK_SIZE bs, + int64_t txfm_cache[TX_MODES], + int64_t ref_best_rd) { int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - const int b_inter_mode = is_inter_block(mbmi); const TX_SIZE max_tx_size = max_txsize_lookup[bs]; TX_SIZE tx_size; - assert(bs == mbmi->sb_type); - if (b_inter_mode) - vp9_subtract_plane(x, bs, 0); - if (cpi->sf.tx_size_search_method == USE_LARGESTALL || - (cpi->sf.tx_size_search_method != USE_FULL_RD && - !b_inter_mode)) { + vp9_subtract_plane(x, bs, 0); + + if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, bs); @@ -966,8 +961,7 @@ static void super_block_yrd(VP9_COMP *cpi, return; } - if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && - b_inter_mode) { + if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) { for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd, &r[tx_size][0], &d[tx_size], &s[tx_size]); @@ -985,6 +979,36 @@ static void super_block_yrd(VP9_COMP *cpi, *psse = sse[mbmi->tx_size]; } +static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int64_t *distortion, int *skip, + int64_t *psse, BLOCK_SIZE bs, + int64_t txfm_cache[TX_MODES], + int64_t ref_best_rd) { + int64_t sse[TX_SIZES]; + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + + assert(bs == mbmi->sb_type); + if (cpi->sf.tx_size_search_method != USE_FULL_RD) { + vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); + choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, + ref_best_rd, bs); + } else { + int r[TX_SIZES][2], s[TX_SIZES]; + int64_t d[TX_SIZES]; + TX_SIZE tx_size; + for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size) + txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], + &s[tx_size], &sse[tx_size], + ref_best_rd, 0, bs, tx_size); + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, + skip, txfm_cache, bs); + } + if (psse) + *psse = sse[mbmi->tx_size]; +} + + static int conditional_skipintra(MB_PREDICTION_MODE mode, MB_PREDICTION_MODE best_intra_mode) { if (mode == D117_PRED && @@ -1245,8 +1269,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, } mic->mbmi.mode = mode; - super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, - bsize, local_tx_cache, best_rd); + intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, local_tx_cache, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1281,7 +1305,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } -static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, +static void super_block_uvrd(MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { @@ -1331,6 +1355,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, TX_SIZE max_tx_size) { + MACROBLOCKD *xd = &x->e_mbd; MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = DC_PRED; int64_t best_rd = INT64_MAX, this_rd; @@ -1341,9 +1366,9 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode; + xd->mi_8x8[0]->mbmi.uv_mode = mode; - super_block_uvrd(cpi, x, &this_rate_tokenonly, + super_block_uvrd(x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1361,7 +1386,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!x->select_txfm_size) { int i; struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = x->e_mbd.plane; + struct macroblockd_plane *const pd = xd->plane; for (i = 1; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][2]; p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; @@ -1382,25 +1407,21 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected; + xd->mi_8x8[0]->mbmi.uv_mode = mode_selected; return best_rd; } -static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, +static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize) { - int64_t this_rd; - int64_t this_sse; + int64_t unused; x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; - super_block_uvrd(cpi, x, rate_tokenonly, distortion, - skippable, &this_sse, bsize, INT64_MAX); - *rate = *rate_tokenonly + - x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED]; - this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion); - - return this_rd; + super_block_uvrd(x, rate_tokenonly, distortion, + skippable, &unused, bsize, INT64_MAX); + *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, @@ -1413,8 +1434,8 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. if (cpi->sf.use_uv_intra_rd_estimate) { - rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, - bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); + rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv, + skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); // Else do a proper rd search for each possible transform size that may // be considered in the main rd loop. } else { @@ -1428,8 +1449,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, int mode_context) { MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; + const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id; // Don't account for mode here if segment skip is enabled. if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { @@ -1454,7 +1474,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int *rate_mv); static int labels2mode(MACROBLOCK *x, int i, - MB_PREDICTION_MODE this_mode, + MB_PREDICTION_MODE mode, int_mv *this_mv, int_mv *this_second_mv, int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int_mv seg_mvs[MAX_REF_FRAMES], @@ -1464,23 +1484,18 @@ static int labels2mode(MACROBLOCK *x, int i, MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi_8x8[0]; MB_MODE_INFO *mbmi = &mic->mbmi; - int cost = 0, thismvcost = 0; + int thismvcost = 0; int idx, idy; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; const int has_second_rf = has_second_ref(mbmi); - /* We have to be careful retrieving previously-encoded motion vectors. - Ones from this macroblock have to be pulled from the BLOCKD array - as they have not yet made it to the bmi array in our MB_MODE_INFO. */ - MB_PREDICTION_MODE m; - // the only time we should do costing for new motion vector or mode // is when we are on a new label (jbb May 08, 2007) - switch (m = this_mode) { + switch (mode) { case NEWMV: this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; - thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv, + thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); if (has_second_rf) { this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; @@ -1492,14 +1507,12 @@ static int labels2mode(MACROBLOCK *x, int i, case NEARESTMV: this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; if (has_second_rf) - this_second_mv->as_int = - frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; + this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; break; case NEARMV: this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; if (has_second_rf) - this_second_mv->as_int = - frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; + this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; break; case ZEROMV: this_mv->as_int = 0; @@ -1510,22 +1523,19 @@ static int labels2mode(MACROBLOCK *x, int i, break; } - cost = cost_mv_ref(cpi, this_mode, - mbmi->mode_context[mbmi->ref_frame[0]]); - mic->bmi[i].as_mv[0].as_int = this_mv->as_int; if (has_second_rf) mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; - mic->bmi[i].as_mode = m; + mic->bmi[i].as_mode = mode; for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) vpx_memcpy(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); - cost += thismvcost; - return cost; + return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) + + thismvcost; } static int64_t encode_inter_mb_segment(VP9_COMP *cpi, @@ -3008,8 +3018,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t rdcosty = INT64_MAX; // Y cost and distortion - super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, - bsize, txfm_cache, ref_best_rd); + inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, + bsize, txfm_cache, ref_best_rd); if (*rate_y == INT_MAX) { *rate2 = INT_MAX; @@ -3024,7 +3034,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); - super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv, + super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty); if (*rate_uv == INT_MAX) { *rate2 = INT_MAX; @@ -3394,8 +3404,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; - super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, - bsize, tx_cache, best_rd); + intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, + bsize, tx_cache, best_rd); if (rate_y == INT_MAX) continue; @@ -4151,7 +4161,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // then dont bother looking at UV vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); - super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, + super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable, &uv_sse, BLOCK_8X8, tmp_best_rdu); if (rate_uv == INT_MAX) continue; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 2be00ff62..7ae110707 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -23,8 +23,8 @@ static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; const TOKENVALUE *vp9_dct_value_tokens_ptr; -static int dct_value_cost[DCT_MAX_VALUE * 2]; -const int *vp9_dct_value_cost_ptr; +static int16_t dct_value_cost[DCT_MAX_VALUE * 2]; +const int16_t *vp9_dct_value_cost_ptr; // Array indices are identical to previously-existing CONTEXT_NODE indices const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index ea86240be..063c0bafe 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -47,7 +47,7 @@ struct VP9_COMP; void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); -extern const int *vp9_dct_value_cost_ptr; +extern const int16_t *vp9_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the * fields are not. |