diff options
47 files changed, 1418 insertions, 1194 deletions
diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 2d89429a1..3412ddd23 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -644,6 +644,28 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(64, 64, &convolve8_ssse3))); #endif +#if HAVE_AVX2 +const ConvolveFunctions convolve8_avx2( + vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, + vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, + vp9_convolve8_avx2, vp9_convolve8_avg_ssse3); + +INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( + make_tuple(4, 4, &convolve8_avx2), + make_tuple(8, 4, &convolve8_avx2), + make_tuple(4, 8, &convolve8_avx2), + make_tuple(8, 8, &convolve8_avx2), + make_tuple(16, 8, &convolve8_avx2), + make_tuple(8, 16, &convolve8_avx2), + make_tuple(16, 16, &convolve8_avx2), + make_tuple(32, 16, &convolve8_avx2), + make_tuple(16, 32, &convolve8_avx2), + make_tuple(32, 32, &convolve8_avx2), + make_tuple(64, 32, &convolve8_avx2), + make_tuple(32, 64, &convolve8_avx2), + make_tuple(64, 64, &convolve8_avx2))); +#endif + #if HAVE_NEON_ASM const ConvolveFunctions convolve8_neon( vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index 20b1c8fbd..99c8d0c7c 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -272,10 +272,18 @@ void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { vp9_fdct16x16_c(in, out, stride); } +void idct16x16_ref(const int16_t *in, uint8_t *dest, int stride, int tx_type) { + vp9_idct16x16_256_add_c(in, dest, stride); +} + void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { vp9_fht16x16_c(in, out, stride, tx_type); } +void iht16x16_ref(const int16_t *in, uint8_t *dest, int stride, int tx_type) { + vp9_iht16x16_256_add_c(in, dest, stride, tx_type); +} + class Trans16x16TestBase { public: virtual ~Trans16x16TestBase() {} @@ -358,12 +366,13 @@ class Trans16x16TestBase { input_block[j] = rnd.Rand8() - rnd.Rand8(); input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; } - if (i == 0) + if (i == 0) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = 255; - if (i == 1) + } else if (i == 1) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -255; + } fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, @@ -378,6 +387,47 @@ class Trans16x16TestBase { } } + void RunQuantCheck(int dc_thred, int ac_thred) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); + DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs); + DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs); + + DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); + DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < kNumCoeffs; ++j) { + input_block[j] = rnd.Rand8() - rnd.Rand8(); + input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; + } + if (i == 0) + for (int j = 0; j < kNumCoeffs; ++j) + input_extreme_block[j] = 255; + if (i == 1) + for (int j = 0; j < kNumCoeffs; ++j) + input_extreme_block[j] = -255; + + fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); + + // clear reconstructed pixel buffers + vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t)); + vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t)); + + // quantization with maximum allowed step sizes + output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred; + for (int j = 1; j < kNumCoeffs; ++j) + output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred; + inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_); + REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_)); + + for (int j = 0; j < kNumCoeffs; ++j) + EXPECT_EQ(ref[j], dst[j]); + } + } + void RunInvAccuracyCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; @@ -414,6 +464,7 @@ class Trans16x16TestBase { int pitch_; int tx_type_; fht_t fwd_txfm_ref; + iht_t inv_txfm_ref; }; class Trans16x16DCT @@ -428,6 +479,7 @@ class Trans16x16DCT tx_type_ = GET_PARAM(2); pitch_ = 16; fwd_txfm_ref = fdct16x16_ref; + inv_txfm_ref = idct16x16_ref; } virtual void TearDown() { libvpx_test::ClearSystemState(); } @@ -455,6 +507,12 @@ TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); } +TEST_P(Trans16x16DCT, QuantCheck) { + // Use maximally allowed quantization step sizes for DC and AC + // coefficients respectively. + RunQuantCheck(1336, 1828); +} + TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); } @@ -471,6 +529,7 @@ class Trans16x16HT tx_type_ = GET_PARAM(2); pitch_ = 16; fwd_txfm_ref = fht16x16_ref; + inv_txfm_ref = iht16x16_ref; } virtual void TearDown() { libvpx_test::ClearSystemState(); } @@ -498,6 +557,12 @@ TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); } +TEST_P(Trans16x16HT, QuantCheck) { + // The encoder skips any non-DC intra prediction modes, + // when the quantization step size goes beyond 988. + RunQuantCheck(549, 988); +} + using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 72c0bd69d..501c69621 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -179,12 +179,13 @@ TEST_P(Trans32x32Test, MemCheck) { input_block[j] = rnd.Rand8() - rnd.Rand8(); input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255; } - if (i == 0) + if (i == 0) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = 255; - if (i == 1) + } else if (i == 1) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -255; + } const int stride = 32; vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride); diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 030665eda..961eb4d6c 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -136,12 +136,13 @@ class Trans4x4TestBase { input_block[j] = rnd.Rand8() - rnd.Rand8(); input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; } - if (i == 0) + if (i == 0) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = 255; - if (i == 1) + } else if (i == 1) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -255; + } fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index c7cf1640c..1d1881a22 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -177,9 +177,11 @@ class FwdTrans8x8TestBase { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; int total_error = 0; + int total_coeff_error = 0; const int count_test_block = 100000; DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64); DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64); + DECLARE_ALIGNED_ARRAY(16, int16_t, ref_temp_block, 64); DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64); DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64); @@ -187,13 +189,24 @@ class FwdTrans8x8TestBase { // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 64; ++j) { src[j] = rnd.Rand8() % 2 ? 255 : 0; - dst[j] = src[j] > 0 ? 0 : 255; + dst[j] = rnd.Rand8() % 2 ? 255 : 0; + + if (i == 0) { + src[j] = 255; + dst[j] = 0; + } else if (i == 1) { + src[j] = 0; + dst[j] = 255; + } + test_input_block[j] = src[j] - dst[j]; } REGISTER_STATE_CHECK( RunFwdTxfm(test_input_block, test_temp_block, pitch_)); REGISTER_STATE_CHECK( + fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_)); + REGISTER_STATE_CHECK( RunInvTxfm(test_temp_block, dst, pitch_)); for (int j = 0; j < 64; ++j) { @@ -202,6 +215,9 @@ class FwdTrans8x8TestBase { if (max_error < error) max_error = error; total_error += error; + + const int coeff_diff = test_temp_block[j] - ref_temp_block[j]; + total_coeff_error += abs(coeff_diff); } EXPECT_GE(1, max_error) @@ -211,6 +227,10 @@ class FwdTrans8x8TestBase { EXPECT_GE(count_test_block/5, total_error) << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average" << " roundtrip error > 1/5 per block"; + + EXPECT_EQ(0, total_coeff_error) + << "Error: Extremal 8x8 FDCT/FHT has" + << "overflow issues in the intermediate steps > 1"; } } @@ -343,7 +363,7 @@ INSTANTIATE_TEST_CASE_P( #if HAVE_SSSE3 && ARCH_X86_64 INSTANTIATE_TEST_CASE_P( - SSSE3, FwdTrans8x8DCT, + DISABLED_SSSE3, FwdTrans8x8DCT, ::testing::Values( make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0))); #endif diff --git a/test/sad_test.cc b/test/sad_test.cc index a692891ad..f9ffa92de 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -29,12 +29,22 @@ #include "third_party/googletest/src/include/gtest/gtest.h" +#if CONFIG_VP8_ENCODER typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr, int source_stride, const unsigned char *reference_ptr, int reference_stride, unsigned int max_sad); typedef std::tr1::tuple<int, int, sad_m_by_n_fn_t> sad_m_by_n_test_param_t; +#endif +#if CONFIG_VP9_ENCODER +typedef unsigned int (*sad_m_by_n_fn_vp9_t)(const unsigned char *source_ptr, + int source_stride, + const unsigned char *reference_ptr, + int reference_stride); +typedef std::tr1::tuple<int, int, sad_m_by_n_fn_vp9_t> + sad_m_by_n_test_param_vp9_t; +#endif typedef void (*sad_n_by_n_by_4_fn_t)(const uint8_t *src_ptr, int src_stride, @@ -87,7 +97,7 @@ class SADTestBase : public ::testing::Test { // Sum of Absolute Differences. Given two blocks, calculate the absolute // difference between two pixels in the same relative location; accumulate. - unsigned int ReferenceSAD(unsigned int max_sad, int block_idx = 0) { + unsigned int ReferenceSAD(unsigned int max_sad, int block_idx) { unsigned int sad = 0; const uint8_t* const reference = GetReference(block_idx); @@ -128,13 +138,43 @@ class SADTestBase : public ::testing::Test { ACMRandom rnd_; }; -class SADTest : public SADTestBase, - public ::testing::WithParamInterface<sad_m_by_n_test_param_t> { +class SADx4Test + : public SADTestBase, + public ::testing::WithParamInterface<sad_n_by_n_by_4_test_param_t> { + public: + SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + void SADs(unsigned int *results) { + const uint8_t* refs[] = {GetReference(0), GetReference(1), + GetReference(2), GetReference(3)}; + + REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_, + refs, reference_stride_, + results)); + } + + void CheckSADs() { + unsigned int reference_sad, exp_sad[4]; + + SADs(exp_sad); + for (int block = 0; block < 4; ++block) { + reference_sad = ReferenceSAD(UINT_MAX, block); + + EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; + } + } +}; + +#if CONFIG_VP8_ENCODER +class SADTest + : public SADTestBase, + public ::testing::WithParamInterface<sad_m_by_n_test_param_t> { public: SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {} protected: - unsigned int SAD(unsigned int max_sad, int block_idx = 0) { + unsigned int SAD(unsigned int max_sad, int block_idx) { unsigned int ret; const uint8_t* const reference = GetReference(block_idx); @@ -144,11 +184,9 @@ class SADTest : public SADTestBase, return ret; } - void CheckSad(unsigned int max_sad) { - unsigned int reference_sad, exp_sad; - - reference_sad = ReferenceSAD(max_sad); - exp_sad = SAD(max_sad); + void CheckSAD(unsigned int max_sad) { + const unsigned int reference_sad = ReferenceSAD(max_sad, 0); + const unsigned int exp_sad = SAD(max_sad, 0); if (reference_sad <= max_sad) { ASSERT_EQ(exp_sad, reference_sad); @@ -158,43 +196,131 @@ class SADTest : public SADTestBase, } } }; +#endif // CONFIG_VP8_ENCODER -class SADx4Test : public SADTestBase, - public ::testing::WithParamInterface<sad_n_by_n_by_4_test_param_t> { +#if CONFIG_VP9_ENCODER +class SADVP9Test + : public SADTestBase, + public ::testing::WithParamInterface<sad_m_by_n_test_param_vp9_t> { public: - SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {} + SADVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {} protected: - void SADs(unsigned int *results) { - const uint8_t* refs[] = {GetReference(0), GetReference(1), - GetReference(2), GetReference(3)}; + unsigned int SAD(int block_idx) { + unsigned int ret; + const uint8_t* const reference = GetReference(block_idx); - REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_, - refs, reference_stride_, - results)); + REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_)); + return ret; } - void CheckSADs() { - unsigned int reference_sad, exp_sad[4]; - - SADs(exp_sad); - for (int block = 0; block < 4; block++) { - reference_sad = ReferenceSAD(UINT_MAX, block); + void CheckSAD() { + const unsigned int reference_sad = ReferenceSAD(UINT_MAX, 0); + const unsigned int exp_sad = SAD(0); - EXPECT_EQ(exp_sad[block], reference_sad) << "block " << block; - } + ASSERT_EQ(reference_sad, exp_sad); } }; +#endif // CONFIG_VP9_ENCODER uint8_t* SADTestBase::source_data_ = NULL; uint8_t* SADTestBase::reference_data_ = NULL; +#if CONFIG_VP8_ENCODER TEST_P(SADTest, MaxRef) { FillConstant(source_data_, source_stride_, 0); FillConstant(reference_data_, reference_stride_, 255); - CheckSad(UINT_MAX); + CheckSAD(UINT_MAX); +} + +TEST_P(SADTest, MaxSrc) { + FillConstant(source_data_, source_stride_, 255); + FillConstant(reference_data_, reference_stride_, 0); + CheckSAD(UINT_MAX); +} + +TEST_P(SADTest, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(UINT_MAX); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(UINT_MAX); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(UINT_MAX); + source_stride_ = tmp_stride; +} + +TEST_P(SADTest, MaxSAD) { + // Verify that, when max_sad is set, the implementation does not return a + // value lower than the reference. + FillConstant(source_data_, source_stride_, 255); + FillConstant(reference_data_, reference_stride_, 0); + CheckSAD(128); +} +#endif // CONFIG_VP8_ENCODER + +#if CONFIG_VP9_ENCODER +TEST_P(SADVP9Test, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, 255); + CheckSAD(); +} + +TEST_P(SADVP9Test, MaxSrc) { + FillConstant(source_data_, source_stride_, 255); + FillConstant(reference_data_, reference_stride_, 0); + CheckSAD(); } +TEST_P(SADVP9Test, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADVP9Test, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADVP9Test, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + source_stride_ = tmp_stride; +} +#endif // CONFIG_VP9_ENCODER + TEST_P(SADx4Test, MaxRef) { FillConstant(source_data_, source_stride_, 0); FillConstant(GetReference(0), reference_stride_, 255); @@ -204,12 +330,6 @@ TEST_P(SADx4Test, MaxRef) { CheckSADs(); } -TEST_P(SADTest, MaxSrc) { - FillConstant(source_data_, source_stride_, 255); - FillConstant(reference_data_, reference_stride_, 0); - CheckSad(UINT_MAX); -} - TEST_P(SADx4Test, MaxSrc) { FillConstant(source_data_, source_stride_, 255); FillConstant(GetReference(0), reference_stride_, 0); @@ -219,15 +339,6 @@ TEST_P(SADx4Test, MaxSrc) { CheckSADs(); } -TEST_P(SADTest, ShortRef) { - int tmp_stride = reference_stride_; - reference_stride_ >>= 1; - FillRandom(source_data_, source_stride_); - FillRandom(reference_data_, reference_stride_); - CheckSad(UINT_MAX); - reference_stride_ = tmp_stride; -} - TEST_P(SADx4Test, ShortRef) { int tmp_stride = reference_stride_; reference_stride_ >>= 1; @@ -240,17 +351,6 @@ TEST_P(SADx4Test, ShortRef) { reference_stride_ = tmp_stride; } -TEST_P(SADTest, UnalignedRef) { - // The reference frame, but not the source frame, may be unaligned for - // certain types of searches. - int tmp_stride = reference_stride_; - reference_stride_ -= 1; - FillRandom(source_data_, source_stride_); - FillRandom(reference_data_, reference_stride_); - CheckSad(UINT_MAX); - reference_stride_ = tmp_stride; -} - TEST_P(SADx4Test, UnalignedRef) { // The reference frame, but not the source frame, may be unaligned for // certain types of searches. @@ -265,15 +365,6 @@ TEST_P(SADx4Test, UnalignedRef) { reference_stride_ = tmp_stride; } -TEST_P(SADTest, ShortSrc) { - int tmp_stride = source_stride_; - source_stride_ >>= 1; - FillRandom(source_data_, source_stride_); - FillRandom(reference_data_, reference_stride_); - CheckSad(UINT_MAX); - source_stride_ = tmp_stride; -} - TEST_P(SADx4Test, ShortSrc) { int tmp_stride = source_stride_; source_stride_ >>= 1; @@ -286,14 +377,6 @@ TEST_P(SADx4Test, ShortSrc) { source_stride_ = tmp_stride; } -TEST_P(SADTest, MaxSAD) { - // Verify that, when max_sad is set, the implementation does not return a - // value lower than the reference. - FillConstant(source_data_, source_stride_, 255); - FillConstant(reference_data_, reference_stride_, 0); - CheckSad(128); -} - using std::tr1::make_tuple; //------------------------------------------------------------------------------ @@ -304,27 +387,27 @@ const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c; const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c; const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c; const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c; -#endif -#if CONFIG_VP9_ENCODER -const sad_m_by_n_fn_t sad_64x64_c_vp9 = vp9_sad64x64_c; -const sad_m_by_n_fn_t sad_32x32_c_vp9 = vp9_sad32x32_c; -const sad_m_by_n_fn_t sad_16x16_c_vp9 = vp9_sad16x16_c; -const sad_m_by_n_fn_t sad_8x16_c_vp9 = vp9_sad8x16_c; -const sad_m_by_n_fn_t sad_16x8_c_vp9 = vp9_sad16x8_c; -const sad_m_by_n_fn_t sad_8x8_c_vp9 = vp9_sad8x8_c; -const sad_m_by_n_fn_t sad_8x4_c_vp9 = vp9_sad8x4_c; -const sad_m_by_n_fn_t sad_4x8_c_vp9 = vp9_sad4x8_c; -const sad_m_by_n_fn_t sad_4x4_c_vp9 = vp9_sad4x4_c; -#endif const sad_m_by_n_test_param_t c_tests[] = { -#if CONFIG_VP8_ENCODER make_tuple(16, 16, sad_16x16_c), make_tuple(8, 16, sad_8x16_c), make_tuple(16, 8, sad_16x8_c), make_tuple(8, 8, sad_8x8_c), make_tuple(4, 4, sad_4x4_c), -#endif +}; +INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); +#endif // CONFIG_VP8_ENCODER + #if CONFIG_VP9_ENCODER +const sad_m_by_n_fn_vp9_t sad_64x64_c_vp9 = vp9_sad64x64_c; +const sad_m_by_n_fn_vp9_t sad_32x32_c_vp9 = vp9_sad32x32_c; +const sad_m_by_n_fn_vp9_t sad_16x16_c_vp9 = vp9_sad16x16_c; +const sad_m_by_n_fn_vp9_t sad_8x16_c_vp9 = vp9_sad8x16_c; +const sad_m_by_n_fn_vp9_t sad_16x8_c_vp9 = vp9_sad16x8_c; +const sad_m_by_n_fn_vp9_t sad_8x8_c_vp9 = vp9_sad8x8_c; +const sad_m_by_n_fn_vp9_t sad_8x4_c_vp9 = vp9_sad8x4_c; +const sad_m_by_n_fn_vp9_t sad_4x8_c_vp9 = vp9_sad4x8_c; +const sad_m_by_n_fn_vp9_t sad_4x4_c_vp9 = vp9_sad4x4_c; +const sad_m_by_n_test_param_vp9_t c_vp9_tests[] = { make_tuple(64, 64, sad_64x64_c_vp9), make_tuple(32, 32, sad_32x32_c_vp9), make_tuple(16, 16, sad_16x16_c_vp9), @@ -334,11 +417,9 @@ const sad_m_by_n_test_param_t c_tests[] = { make_tuple(8, 4, sad_8x4_c_vp9), make_tuple(4, 8, sad_4x8_c_vp9), make_tuple(4, 4, sad_4x4_c_vp9), -#endif }; -INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); +INSTANTIATE_TEST_CASE_P(C, SADVP9Test, ::testing::ValuesIn(c_vp9_tests)); -#if CONFIG_VP9_ENCODER const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c; const sad_n_by_n_by_4_fn_t sad_64x32x4d_c = vp9_sad64x32x4d_c; const sad_n_by_n_by_4_fn_t sad_32x64x4d_c = vp9_sad32x64x4d_c; @@ -375,8 +456,8 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values( const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6; INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values( make_tuple(16, 16, sad_16x16_armv6))); -#endif -#endif +#endif // CONFIG_VP8_ENCODER +#endif // HAVE_MEDIA #if HAVE_NEON #if CONFIG_VP8_ENCODER @@ -391,8 +472,8 @@ INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values( make_tuple(16, 8, sad_16x8_neon), make_tuple(8, 8, sad_8x8_neon), make_tuple(4, 4, sad_4x4_neon))); -#endif -#endif +#endif // CONFIG_VP8_ENCODER +#endif // HAVE_NEON //------------------------------------------------------------------------------ // x86 functions @@ -403,40 +484,39 @@ const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx; const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx; const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx; const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx; -#endif -#if CONFIG_VP9_ENCODER -const sad_m_by_n_fn_t sad_16x16_mmx_vp9 = vp9_sad16x16_mmx; -const sad_m_by_n_fn_t sad_8x16_mmx_vp9 = vp9_sad8x16_mmx; -const sad_m_by_n_fn_t sad_16x8_mmx_vp9 = vp9_sad16x8_mmx; -const sad_m_by_n_fn_t sad_8x8_mmx_vp9 = vp9_sad8x8_mmx; -const sad_m_by_n_fn_t sad_4x4_mmx_vp9 = vp9_sad4x4_mmx; -#endif - const sad_m_by_n_test_param_t mmx_tests[] = { -#if CONFIG_VP8_ENCODER make_tuple(16, 16, sad_16x16_mmx), make_tuple(8, 16, sad_8x16_mmx), make_tuple(16, 8, sad_16x8_mmx), make_tuple(8, 8, sad_8x8_mmx), make_tuple(4, 4, sad_4x4_mmx), -#endif +}; +INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests)); +#endif // CONFIG_VP8_ENCODER + #if CONFIG_VP9_ENCODER +const sad_m_by_n_fn_vp9_t sad_16x16_mmx_vp9 = vp9_sad16x16_mmx; +const sad_m_by_n_fn_vp9_t sad_8x16_mmx_vp9 = vp9_sad8x16_mmx; +const sad_m_by_n_fn_vp9_t sad_16x8_mmx_vp9 = vp9_sad16x8_mmx; +const sad_m_by_n_fn_vp9_t sad_8x8_mmx_vp9 = vp9_sad8x8_mmx; +const sad_m_by_n_fn_vp9_t sad_4x4_mmx_vp9 = vp9_sad4x4_mmx; +const sad_m_by_n_test_param_vp9_t mmx_vp9_tests[] = { make_tuple(16, 16, sad_16x16_mmx_vp9), make_tuple(8, 16, sad_8x16_mmx_vp9), make_tuple(16, 8, sad_16x8_mmx_vp9), make_tuple(8, 8, sad_8x8_mmx_vp9), make_tuple(4, 4, sad_4x4_mmx_vp9), -#endif }; -INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests)); -#endif +INSTANTIATE_TEST_CASE_P(MMX, SADVP9Test, ::testing::ValuesIn(mmx_vp9_tests)); +#endif // CONFIG_VP9_ENCODER +#endif // HAVE_MMX #if HAVE_SSE #if CONFIG_VP9_ENCODER #if CONFIG_USE_X86INC -const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse; -const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse; -INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values( +const sad_m_by_n_fn_vp9_t sad_4x4_sse_vp9 = vp9_sad4x4_sse; +const sad_m_by_n_fn_vp9_t sad_4x8_sse_vp9 = vp9_sad4x8_sse; +INSTANTIATE_TEST_CASE_P(SSE, SADVP9Test, ::testing::Values( make_tuple(4, 4, sad_4x4_sse_vp9), make_tuple(4, 8, sad_4x8_sse_vp9))); @@ -456,32 +536,30 @@ const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt; const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt; const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt; const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt; -#endif -#if CONFIG_VP9_ENCODER -#if CONFIG_USE_X86INC -const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2; -const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2; -const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2; -const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2; -const sad_m_by_n_fn_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2; -const sad_m_by_n_fn_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2; -const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2; -const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2; -const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2; -const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2; -const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2; -#endif -#endif const sad_m_by_n_test_param_t sse2_tests[] = { -#if CONFIG_VP8_ENCODER make_tuple(16, 16, sad_16x16_wmt), make_tuple(8, 16, sad_8x16_wmt), make_tuple(16, 8, sad_16x8_wmt), make_tuple(8, 8, sad_8x8_wmt), make_tuple(4, 4, sad_4x4_wmt), -#endif +}; +INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); +#endif // CONFIG_VP8_ENCODER + #if CONFIG_VP9_ENCODER #if CONFIG_USE_X86INC +const sad_m_by_n_fn_vp9_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2; +const sad_m_by_n_fn_vp9_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2; +const sad_m_by_n_fn_vp9_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2; +const sad_m_by_n_fn_vp9_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2; +const sad_m_by_n_fn_vp9_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2; +const sad_m_by_n_fn_vp9_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2; +const sad_m_by_n_fn_vp9_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2; +const sad_m_by_n_fn_vp9_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2; +const sad_m_by_n_fn_vp9_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2; +const sad_m_by_n_fn_vp9_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2; +const sad_m_by_n_fn_vp9_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2; +const sad_m_by_n_test_param_vp9_t sse2_vp9_tests[] = { make_tuple(64, 64, sad_64x64_sse2_vp9), make_tuple(64, 32, sad_64x32_sse2_vp9), make_tuple(32, 64, sad_32x64_sse2_vp9), @@ -493,13 +571,9 @@ const sad_m_by_n_test_param_t sse2_tests[] = { make_tuple(8, 16, sad_8x16_sse2_vp9), make_tuple(8, 8, sad_8x8_sse2_vp9), make_tuple(8, 4, sad_8x4_sse2_vp9), -#endif -#endif }; -INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); +INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::ValuesIn(sse2_vp9_tests)); -#if CONFIG_VP9_ENCODER -#if CONFIG_USE_X86INC const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2; const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2; const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2; @@ -523,9 +597,9 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values( make_tuple(8, 16, sad_8x16x4d_sse2), make_tuple(8, 8, sad_8x8x4d_sse2), make_tuple(8, 4, sad_8x4x4d_sse2))); -#endif -#endif -#endif +#endif // CONFIG_USE_X86INC +#endif // CONFIG_VP9_ENCODER +#endif // HAVE_SSE2 #if HAVE_SSE3 #if CONFIG_VP8_ENCODER @@ -540,8 +614,8 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values( make_tuple(8, 16, sad_8x16x4d_sse3), make_tuple(8, 8, sad_8x8x4d_sse3), make_tuple(4, 4, sad_4x4x4d_sse3))); -#endif -#endif +#endif // CONFIG_VP8_ENCODER +#endif // HAVE_SSE3 #if HAVE_SSSE3 #if CONFIG_USE_X86INC @@ -549,8 +623,8 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values( const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3; INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( make_tuple(16, 16, sad_16x16_sse3))); -#endif -#endif -#endif +#endif // CONFIG_VP8_ENCODER +#endif // CONFIG_USE_X86INC +#endif // HAVE_SSSE3 } // namespace diff --git a/third_party/googletest/README.libvpx b/third_party/googletest/README.libvpx index 6fdeb8731..7201a67d3 100644 --- a/third_party/googletest/README.libvpx +++ b/third_party/googletest/README.libvpx @@ -12,4 +12,4 @@ failures, various options for running the tests, and XML test report generation. Local Modifications: -None.
\ No newline at end of file +Removed unused declarations of kPathSeparatorString to have warning free build.
\ No newline at end of file diff --git a/third_party/googletest/src/src/gtest-all.cc b/third_party/googletest/src/src/gtest-all.cc index a9a03b2e3..8d906279a 100644 --- a/third_party/googletest/src/src/gtest-all.cc +++ b/third_party/googletest/src/src/gtest-all.cc @@ -7904,7 +7904,6 @@ namespace internal { // of them. const char kPathSeparator = '\\'; const char kAlternatePathSeparator = '/'; -const char kPathSeparatorString[] = "\\"; const char kAlternatePathSeparatorString[] = "/"; # if GTEST_OS_WINDOWS_MOBILE // Windows CE doesn't have a current directory. You should not use @@ -7918,7 +7917,6 @@ const char kCurrentDirectoryString[] = ".\\"; # endif // GTEST_OS_WINDOWS_MOBILE #else const char kPathSeparator = '/'; -const char kPathSeparatorString[] = "/"; const char kCurrentDirectoryString[] = "./"; #endif // GTEST_OS_WINDOWS diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index 1e645fbdf..f26741f17 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -136,8 +136,56 @@ int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, sum_diff_thresh= SUM_DIFF_THRESHOLD; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; - if (abs(sum_diff) > sum_diff_thresh) + if (abs(sum_diff) > sum_diff_thresh) { + // Before returning to copy the block (i.e., apply no denoising), check + // if we can still apply some (weaker) temporal filtering to this block, + // that would otherwise not be denoised at all. Simplest is to apply + // an additional adjustment to running_avg_y to bring it closer to sig. + // The adjustment is capped by a maximum delta, and chosen such that + // in most cases the resulting sum_diff will be within the + // accceptable range given by sum_diff_thresh. + + // The delta is set by the excess of absolute pixel diff over threshold. + int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; + // Only apply the adjustment for max delta up to 3. + if (delta < 4) { + sig -= sig_stride * 16; + mc_running_avg_y -= mc_avg_y_stride * 16; + running_avg_y -= avg_y_stride * 16; + for (r = 0; r < 16; ++r) { + for (c = 0; c < 16; ++c) { + int diff = mc_running_avg_y[c] - sig[c]; + int adjustment = abs(diff); + if (adjustment > delta) + adjustment = delta; + if (diff > 0) { + // Bring denoised signal down. + if (running_avg_y[c] - adjustment < 0) + running_avg_y[c] = 0; + else + running_avg_y[c] = running_avg_y[c] - adjustment; + sum_diff -= adjustment; + } else if (diff < 0) { + // Bring denoised signal up. + if (running_avg_y[c] + adjustment > 255) + running_avg_y[c] = 255; + else + running_avg_y[c] = running_avg_y[c] + adjustment; + sum_diff += adjustment; + } + } + // TODO(marpan): Check here if abs(sum_diff) has gone below the + // threshold sum_diff_thresh, and if so, we can exit the row loop. + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + if (abs(sum_diff) > sum_diff_thresh) + return COPY_BLOCK; + } else { return COPY_BLOCK; + } + } vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; @@ -201,6 +249,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, int mv_col; unsigned int motion_magnitude2; unsigned int sse_thresh; + int sse_diff_thresh = 0; MV_REFERENCE_FRAME frame = x->best_reference_frame; MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; @@ -225,11 +274,16 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs; mv_col = x->best_sse_mv.as_mv.col; mv_row = x->best_sse_mv.as_mv.row; + // Bias to zero_mv if small amount of motion. + // Note sse_diff_thresh is intialized to zero, so this ensures + // we will always choose zero_mv for denoising if + // zero_mv_see <= best_sse (i.e., sse_diff <= 0). + if ((unsigned int)(mv_row * mv_row + mv_col * mv_col) + <= NOISE_MOTION_THRESHOLD) + sse_diff_thresh = (int)SSE_DIFF_THRESHOLD; if (frame == INTRA_FRAME || - ((unsigned int)(mv_row *mv_row + mv_col *mv_col) - <= NOISE_MOTION_THRESHOLD && - sse_diff < (int)SSE_DIFF_THRESHOLD)) + sse_diff <= sse_diff_thresh) { /* * Handle intra blocks as referring to last frame with zero motion diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index e95e44fd5..762b7e345 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2611,9 +2611,8 @@ int vp8_update_entropy(VP8_COMP *cpi, int update) #if OUTPUT_YUV_SRC -void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) +void vp8_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s) { - FILE *yuv_file = fopen(name, "ab"); unsigned char *src = s->y_buffer; int h = s->y_height; @@ -2643,12 +2642,9 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) src += s->uv_stride; } while (--h); - - fclose(yuv_file); } #endif - static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; @@ -3895,7 +3891,7 @@ static void encode_frame_to_data_rate #endif #ifdef OUTPUT_YUV_SRC - vp8_write_yuv_frame(cpi->Source); + vp8_write_yuv_frame(yuv_file, cpi->Source); #endif do diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c index 5112f891e..ff439dd64 100644 --- a/vp8/encoder/x86/denoising_sse2.c +++ b/vp8/encoder/x86/denoising_sse2.c @@ -112,9 +112,70 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, sum_diff_thresh = SUM_DIFF_THRESHOLD; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; - if (abs(sum_diff) > sum_diff_thresh) - { + if (abs(sum_diff) > sum_diff_thresh) { + // Before returning to copy the block (i.e., apply no denoising), + // checK if we can still apply some (weaker) temporal filtering to + // this block, that would otherwise not be denoised at all. Simplest + // is to apply an additional adjustment to running_avg_y to bring it + // closer to sig. The adjustment is capped by a maximum delta, and + // chosen such that in most cases the resulting sum_diff will be + // within the accceptable range given by sum_diff_thresh. + + // The delta is set by the excess of absolute pixel diff over the + // threshold. + int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; + // Only apply the adjustment for max delta up to 3. + if (delta < 4) { + const __m128i k_delta = _mm_set1_epi8(delta); + sig -= sig_stride * 16; + mc_running_avg_y -= mc_avg_y_stride * 16; + running_avg_y -= avg_y_stride * 16; + for (r = 0; r < 16; ++r) { + __m128i v_running_avg_y = + _mm_loadu_si128((__m128i *)(&running_avg_y[0])); + // Calculate differences. + const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); + const __m128i v_mc_running_avg_y = + _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0])); + const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); + const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); + // Obtain the sign. FF if diff is negative. + const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); + // Clamp absolute difference to delta to get the adjustment. + const __m128i adj = + _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta); + // Restore the sign and get positive and negative adjustments. + __m128i padj, nadj; + padj = _mm_andnot_si128(diff_sign, adj); + nadj = _mm_and_si128(diff_sign, adj); + // Calculate filtered value. + v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj); + v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj); + _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); + + // Accumulate the adjustments. + acc_diff = _mm_subs_epi8(acc_diff, padj); + acc_diff = _mm_adds_epi8(acc_diff, nadj); + + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + { + // Update the sum of all pixel differences of this MB. + union sum_union s; + s.v = acc_diff; + sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5] + + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11] + + s.e[12] + s.e[13] + s.e[14] + s.e[15]; + if (abs(sum_diff) > sum_diff_thresh) { + return COPY_BLOCK; + } + } + } else { return COPY_BLOCK; + } } } diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 501dd3eb4..4f6f9fa5d 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -472,70 +472,128 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, return res; } - int vp8_reverse_trans(int); +static vpx_codec_err_t get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args) +{ + int *const arg = va_arg(args, int *); + if (arg == NULL) + return VPX_CODEC_INVALID_PARAM; + *arg = vp8_get_quantizer(ctx->cpi); + return VPX_CODEC_OK; +} -static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) +static vpx_codec_err_t get_quantizer64(vpx_codec_alg_priv_t *ctx, va_list args) { - void *arg = va_arg(args, void *); + int *const arg = va_arg(args, int *); + if (arg == NULL) + return VPX_CODEC_INVALID_PARAM; + *arg = vp8_reverse_trans(vp8_get_quantizer(ctx->cpi)); + return VPX_CODEC_OK; +} -#define MAP(id, var) case id: *(RECAST(id, arg)) = var; break +static vpx_codec_err_t update_extracfg(vpx_codec_alg_priv_t *ctx, + const struct vp8_extracfg *extra_cfg) +{ + const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg, 0); + if (res == VPX_CODEC_OK) { + ctx->vp8_cfg = *extra_cfg; + set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg, NULL); + vp8_change_config(ctx->cpi, &ctx->oxcf); + } + return res; +} - if (!arg) - return VPX_CODEC_INVALID_PARAM; +static vpx_codec_err_t set_cpu_used(vpx_codec_alg_priv_t *ctx, va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); + return update_extracfg(ctx, &extra_cfg); +} - switch (ctrl_id) - { - MAP(VP8E_GET_LAST_QUANTIZER, vp8_get_quantizer(ctx->cpi)); - MAP(VP8E_GET_LAST_QUANTIZER_64, vp8_reverse_trans(vp8_get_quantizer(ctx->cpi))); - } +static vpx_codec_err_t set_enable_auto_alt_ref(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.enable_auto_alt_ref = CAST(VP8E_SET_ENABLEAUTOALTREF, args); + return update_extracfg(ctx, &extra_cfg); +} - return VPX_CODEC_OK; -#undef MAP +static vpx_codec_err_t set_noise_sensitivity(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.noise_sensitivity = CAST(VP8E_SET_NOISE_SENSITIVITY, args); + return update_extracfg(ctx, &extra_cfg); } +static vpx_codec_err_t set_sharpness(vpx_codec_alg_priv_t *ctx, va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.Sharpness = CAST(VP8E_SET_SHARPNESS, args); + return update_extracfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t set_static_thresh(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.static_thresh = CAST(VP8E_SET_STATIC_THRESHOLD, args); + return update_extracfg(ctx, &extra_cfg); +} -static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) +static vpx_codec_err_t set_token_partitions(vpx_codec_alg_priv_t *ctx, + va_list args) { - vpx_codec_err_t res = VPX_CODEC_OK; - struct vp8_extracfg xcfg = ctx->vp8_cfg; + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.token_partitions = CAST(VP8E_SET_TOKEN_PARTITIONS, args); + return update_extracfg(ctx, &extra_cfg); +} -#define MAP(id, var) case id: var = CAST(id, args); break; +static vpx_codec_err_t set_arnr_max_frames(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.arnr_max_frames = CAST(VP8E_SET_ARNR_MAXFRAMES, args); + return update_extracfg(ctx, &extra_cfg); +} - switch (ctrl_id) - { - MAP(VP8E_SET_CPUUSED, xcfg.cpu_used); - MAP(VP8E_SET_ENABLEAUTOALTREF, xcfg.enable_auto_alt_ref); - MAP(VP8E_SET_NOISE_SENSITIVITY, xcfg.noise_sensitivity); - MAP(VP8E_SET_SHARPNESS, xcfg.Sharpness); - MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh); - MAP(VP8E_SET_TOKEN_PARTITIONS, xcfg.token_partitions); - - MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames); - MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength); - MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type); - MAP(VP8E_SET_TUNING, xcfg.tuning); - MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); - MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct); +static vpx_codec_err_t set_arnr_strength(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.arnr_strength = CAST(VP8E_SET_ARNR_STRENGTH, args); + return update_extracfg(ctx, &extra_cfg); +} - } +static vpx_codec_err_t set_arnr_type(vpx_codec_alg_priv_t *ctx, va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.arnr_type = CAST(VP8E_SET_ARNR_TYPE, args); + return update_extracfg(ctx, &extra_cfg); +} - res = validate_config(ctx, &ctx->cfg, &xcfg, 0); +static vpx_codec_err_t set_tuning(vpx_codec_alg_priv_t *ctx, va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.tuning = CAST(VP8E_SET_TUNING, args); + return update_extracfg(ctx, &extra_cfg); +} - if (!res) - { - ctx->vp8_cfg = xcfg; - set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg, NULL); - vp8_change_config(ctx->cpi, &ctx->oxcf); - } +static vpx_codec_err_t set_cq_level(vpx_codec_alg_priv_t *ctx, va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.cq_level = CAST(VP8E_SET_CQ_LEVEL, args); + return update_extracfg(ctx, &extra_cfg); +} - return res; -#undef MAP +static vpx_codec_err_t set_rc_max_intra_bitrate_pct(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + struct vp8_extracfg extra_cfg = ctx->vp8_cfg; + extra_cfg.rc_max_intra_bitrate_pct = + CAST(VP8E_SET_MAX_INTRA_BITRATE_PCT, args); + return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, @@ -976,8 +1034,7 @@ static const vpx_codec_cx_pkt_t *vp8e_get_cxdata(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -996,8 +1053,7 @@ static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -1016,12 +1072,10 @@ static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { #if CONFIG_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - (void)ctr_id; if (data) { @@ -1032,7 +1086,6 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; #else (void)ctx; - (void)ctr_id; (void)args; return VPX_CODEC_INCAPABLE; #endif @@ -1090,8 +1143,7 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) } static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { int update = va_arg(args, int); vp8_update_entropy(ctx->cpi, update); @@ -1100,8 +1152,7 @@ static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { int update = va_arg(args, int); vp8_update_reference(ctx->cpi, update); @@ -1109,8 +1160,7 @@ static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { int reference_flag = va_arg(args, int); vp8_use_as_reference(ctx->cpi, reference_flag); @@ -1118,7 +1168,6 @@ static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_set_roi_map(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *); @@ -1138,8 +1187,7 @@ static vpx_codec_err_t vp8e_set_roi_map(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t vp8e_set_activemap(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { vpx_active_map_t *data = va_arg(args, vpx_active_map_t *); @@ -1158,8 +1206,7 @@ static vpx_codec_err_t vp8e_set_activemap(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { vpx_scaling_mode_t *data = va_arg(args, vpx_scaling_mode_t *); @@ -1197,20 +1244,20 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {VP8E_SET_ROI_MAP, vp8e_set_roi_map}, {VP8E_SET_ACTIVEMAP, vp8e_set_activemap}, {VP8E_SET_SCALEMODE, vp8e_set_scalemode}, - {VP8E_SET_CPUUSED, set_param}, - {VP8E_SET_NOISE_SENSITIVITY, set_param}, - {VP8E_SET_ENABLEAUTOALTREF, set_param}, - {VP8E_SET_SHARPNESS, set_param}, - {VP8E_SET_STATIC_THRESHOLD, set_param}, - {VP8E_SET_TOKEN_PARTITIONS, set_param}, - {VP8E_GET_LAST_QUANTIZER, get_param}, - {VP8E_GET_LAST_QUANTIZER_64, get_param}, - {VP8E_SET_ARNR_MAXFRAMES, set_param}, - {VP8E_SET_ARNR_STRENGTH , set_param}, - {VP8E_SET_ARNR_TYPE , set_param}, - {VP8E_SET_TUNING, set_param}, - {VP8E_SET_CQ_LEVEL, set_param}, - {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, + {VP8E_SET_CPUUSED, set_cpu_used}, + {VP8E_SET_NOISE_SENSITIVITY, set_noise_sensitivity}, + {VP8E_SET_ENABLEAUTOALTREF, set_enable_auto_alt_ref}, + {VP8E_SET_SHARPNESS, set_sharpness}, + {VP8E_SET_STATIC_THRESHOLD, set_static_thresh}, + {VP8E_SET_TOKEN_PARTITIONS, set_token_partitions}, + {VP8E_GET_LAST_QUANTIZER, get_quantizer}, + {VP8E_GET_LAST_QUANTIZER_64, get_quantizer64}, + {VP8E_SET_ARNR_MAXFRAMES, set_arnr_max_frames}, + {VP8E_SET_ARNR_STRENGTH , set_arnr_strength}, + {VP8E_SET_ARNR_TYPE , set_arnr_type}, + {VP8E_SET_TUNING, set_tuning}, + {VP8E_SET_CQ_LEVEL, set_cq_level}, + {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_rc_max_intra_bitrate_pct}, { -1, NULL}, }; diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index d6dfb1643..56394fb1c 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -576,8 +576,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -598,8 +597,7 @@ static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -620,7 +618,6 @@ static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { #if CONFIG_POSTPROC @@ -640,31 +637,56 @@ static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, #endif } -static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) -{ + +static vpx_codec_err_t vp8_set_dbg_color_ref_frame(vpx_codec_alg_priv_t *ctx, + va_list args) { #if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - int data = va_arg(args, int); + ctx->dbg_color_ref_frame_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} -#define MAP(id, var) case id: var = data; break; +static vpx_codec_err_t vp8_set_dbg_color_mb_modes(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_color_mb_modes_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} - switch (ctrl_id) - { - MAP (VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag); - MAP (VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag); - MAP (VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag); - MAP (VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag); - } +static vpx_codec_err_t vp8_set_dbg_color_b_modes(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_color_b_modes_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} - return VPX_CODEC_OK; +static vpx_codec_err_t vp8_set_dbg_display_mv(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_display_mv_flag = va_arg(args, int); + return VPX_CODEC_OK; #else - return VPX_CODEC_INCAPABLE; + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; #endif } static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { int *update_info = va_arg(args, int *); @@ -685,7 +707,6 @@ static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, extern int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ); static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { int *ref_info = va_arg(args, int *); @@ -706,7 +727,6 @@ static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { @@ -725,7 +745,6 @@ static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t vp8_set_decryptor(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); @@ -748,10 +767,10 @@ vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = {VP8_SET_REFERENCE, vp8_set_reference}, {VP8_COPY_REFERENCE, vp8_get_reference}, {VP8_SET_POSTPROC, vp8_set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options}, - {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options}, - {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options}, - {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options}, + {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_color_ref_frame}, + {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_color_mb_modes}, + {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_color_b_modes}, + {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_display_mv}, {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates}, {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted}, {VP8D_GET_LAST_REF_USED, vp8_get_last_ref_frame}, diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 8ca356dd6..9088b0bde 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -228,8 +228,6 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); int lossless; - /* Inverse transform function pointers. */ - void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); int corrupted; diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 0ea06c3e5..09ce72ef2 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -431,6 +431,12 @@ specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc"; +add_proto qw/void vp9_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get8x8var mmx/, "$sse2_x86inc"; + +add_proto qw/void vp9_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get16x16var avx2/, "$sse2_x86inc"; + add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance8x4/, "$sse2_x86inc"; @@ -520,82 +526,82 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; -add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad64x64/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad32x64/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad64x32/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad32x16/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x32/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad32x32/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x4/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad4x8/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; diff --git a/vp9/common/x86/vp9_idct_intrin_ssse3.c b/vp9/common/x86/vp9_idct_intrin_ssse3.c index 03ee2fdbe..73bf5d1d7 100644 --- a/vp9/common/x86/vp9_idct_intrin_ssse3.c +++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c @@ -16,7 +16,7 @@ #include <tmmintrin.h> // SSSE3 #include "vp9/common/x86/vp9_idct_intrin_sse2.h" -static void idct16_8col(__m128i *in) { +static void idct16_8col(__m128i *in, int round) { const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); @@ -36,6 +36,8 @@ static void idct16_8col(__m128i *in) { const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); __m128i v[16], u[16], s[16], t[16]; @@ -266,28 +268,80 @@ static void idct16_8col(__m128i *in) { t[15] = _mm_add_epi16(s[12], s[15]); // stage 6 - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_sub_epi16(t[13], t[10]); - u[1] = _mm_add_epi16(t[13], t[10]); - u[2] = _mm_sub_epi16(t[12], t[11]); - u[3] = _mm_add_epi16(t[12], t[11]); - - s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2); - s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2); - s[14] = t[14]; - s[15] = t[15]; + if (round == 1) { + s[0] = _mm_add_epi16(t[0], t[7]); + s[1] = _mm_add_epi16(t[1], t[6]); + s[2] = _mm_add_epi16(t[2], t[5]); + s[3] = _mm_add_epi16(t[3], t[4]); + s[4] = _mm_sub_epi16(t[3], t[4]); + s[5] = _mm_sub_epi16(t[2], t[5]); + s[6] = _mm_sub_epi16(t[1], t[6]); + s[7] = _mm_sub_epi16(t[0], t[7]); + s[8] = t[8]; + s[9] = t[9]; + + u[0] = _mm_unpacklo_epi16(t[10], t[13]); + u[1] = _mm_unpackhi_epi16(t[10], t[13]); + u[2] = _mm_unpacklo_epi16(t[11], t[12]); + u[3] = _mm_unpackhi_epi16(t[11], t[12]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); + v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); + v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); + v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); + v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16); + v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16); + v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16); + v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + + s[10] = _mm_packs_epi32(u[0], u[1]); + s[13] = _mm_packs_epi32(u[2], u[3]); + s[11] = _mm_packs_epi32(u[4], u[5]); + s[12] = _mm_packs_epi32(u[6], u[7]); + s[14] = t[14]; + s[15] = t[15]; + } else { + s[0] = _mm_add_epi16(t[0], t[7]); + s[1] = _mm_add_epi16(t[1], t[6]); + s[2] = _mm_add_epi16(t[2], t[5]); + s[3] = _mm_add_epi16(t[3], t[4]); + s[4] = _mm_sub_epi16(t[3], t[4]); + s[5] = _mm_sub_epi16(t[2], t[5]); + s[6] = _mm_sub_epi16(t[1], t[6]); + s[7] = _mm_sub_epi16(t[0], t[7]); + s[8] = t[8]; + s[9] = t[9]; + + u[0] = _mm_sub_epi16(t[13], t[10]); + u[1] = _mm_add_epi16(t[13], t[10]); + u[2] = _mm_sub_epi16(t[12], t[11]); + u[3] = _mm_add_epi16(t[12], t[11]); + + s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); + s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); + s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2); + s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2); + s[14] = t[14]; + s[15] = t[15]; + } // stage 7 in[0] = _mm_add_epi16(s[0], s[15]); @@ -308,10 +362,10 @@ static void idct16_8col(__m128i *in) { in[15] = _mm_sub_epi16(s[0], s[15]); } -static void idct16_sse2(__m128i *in0, __m128i *in1) { +static void idct16_sse2(__m128i *in0, __m128i *in1, int round) { array_transpose_16x16(in0, in1); - idct16_8col(in0); - idct16_8col(in1); + idct16_8col(in0, round); + idct16_8col(in1, round); } void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest, @@ -322,8 +376,8 @@ void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest, input += 8; load_buffer_8x16(input, in1); - idct16_sse2(in0, in1); - idct16_sse2(in0, in1); + idct16_sse2(in0, in1, 0); + idct16_sse2(in0, in1, 1); write_buffer_8x16(dest, in0, stride); dest += 8; diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 121b1f2cd..fc70035f2 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -195,30 +195,32 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, struct macroblockd_plane *const pd = &xd->plane[plane]; if (eob > 0) { TX_TYPE tx_type; - const PLANE_TYPE plane_type = pd->plane_type; int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - switch (tx_size) { - case TX_4X4: - tx_type = get_tx_type_4x4(plane_type, xd, block); - if (tx_type == DCT_DCT) - xd->itxm_add(dqcoeff, dst, stride, eob); - else - vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type); - break; - case TX_8X8: - tx_type = get_tx_type(plane_type, xd); - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - tx_type = get_tx_type(plane_type, xd); - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - tx_type = DCT_DCT; - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); + if (xd->lossless) { + tx_type = DCT_DCT; + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + const PLANE_TYPE plane_type = pd->plane_type; + switch (tx_size) { + case TX_4X4: + tx_type = get_tx_type_4x4(plane_type, xd, block); + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + tx_type = get_tx_type(plane_type, xd); + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + tx_type = get_tx_type(plane_type, xd); + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + tx_type = DCT_DCT; + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + } } if (eob == 1) { @@ -588,8 +590,6 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - - xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; } static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) { diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c index 47ad8d8cc..0d6b41d15 100644 --- a/vp9/encoder/vp9_aq_complexity.c +++ b/vp9/encoder/vp9_aq_complexity.c @@ -47,11 +47,21 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { // Use some of the segments for in frame Q adjustment. for (segment = 1; segment < 2; segment++) { - const int qindex_delta = + int qindex_delta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + + // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0. + // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta + // is sometimes applied without going back around the rd loop. + // This could lead to an illegal combination of partition size and q. + if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { + qindex_delta = -cm->base_qindex + 1; + } + if ((cm->base_qindex + qindex_delta) > 0) { + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } } } } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index c406860a0..c3cd93b78 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -109,6 +109,7 @@ struct macroblock { MV pred_mv[MAX_REF_FRAMES]; void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride); + void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); }; #ifdef __cplusplus diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ef0871873..c1db8263e 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -76,18 +76,6 @@ typedef struct { unsigned int var; } diff; -static void get_sse_sum_8x8(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum); -} - -static void get_sse_sum_16x16(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum); -} - static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs) { @@ -490,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi, unsigned int sse = 0; int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) - get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); + vp9_get8x8var(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); } } @@ -1226,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi, int b_offset = b_mi_row * MI_SIZE * src_stride + b_mi_col * MI_SIZE; - get_sse_sum_16x16(src + b_offset, src_stride, - pre_src + b_offset, pre_stride, - &d16[j].sse, &d16[j].sum); + vp9_get16x16var(src + b_offset, src_stride, + pre_src + b_offset, pre_stride, + &d16[j].sse, &d16[j].sum); d16[j].var = d16[j].sse - (((uint32_t)d16[j].sum * d16[j].sum) >> 8); @@ -1303,14 +1291,14 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, if (row8x8_remaining >= MI_BLOCK_SIZE && col8x8_remaining >= MI_BLOCK_SIZE) { this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride, - pre, pre_stride, 0x7fffffff); + pre, pre_stride); threshold = (1 << 12); } else { int r, c; for (r = 0; r < row8x8_remaining; r += 2) for (c = 0; c < col8x8_remaining; c += 2) - this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, pre, - pre_stride, 0x7fffffff); + this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, + pre, pre_stride); threshold = (row8x8_remaining * col8x8_remaining) << 6; } @@ -2381,22 +2369,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { sizeof(*xd->above_seg_context) * aligned_mi_cols); } -static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { - if (lossless) { - // printf("Switching to lossless\n"); - cpi->mb.fwd_txm4x4 = vp9_fwht4x4; - cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; - cpi->mb.optimize = 0; - cpi->common.lf.filter_level = 0; - cpi->zbin_mode_boost_enabled = 0; - cpi->common.tx_mode = ONLY_4X4; - } else { - // printf("Not lossless\n"); - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; - cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; - } -} - static int check_dual_ref_flags(VP9_COMP *cpi) { const int ref_flags = cpi->ref_frame_flags; @@ -2433,7 +2405,7 @@ static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { } static TX_MODE select_tx_mode(const VP9_COMP *cpi) { - if (cpi->oxcf.lossless) { + if (cpi->mb.e_mbd.lossless) { return ONLY_4X4; } else if (cpi->common.current_video_frame == 0) { return TX_MODE_SELECT; @@ -3023,13 +2995,21 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(rd_opt->tx_select_diff); vp9_zero(rd_opt->tx_select_threshes); - cm->tx_mode = select_tx_mode(cpi); - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); + + cm->tx_mode = select_tx_mode(cpi); + + cpi->mb.fwd_txm4x4 = cpi->mb.e_mbd.lossless ? vp9_fwht4x4 : vp9_fdct4x4; + cpi->mb.itxm_add = cpi->mb.e_mbd.lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; + + if (cpi->mb.e_mbd.lossless) { + cpi->mb.optimize = 0; + cpi->common.lf.filter_level = 0; + cpi->zbin_mode_boost_enabled = 0; + } vp9_frame_init_quantizer(cpi); @@ -3369,7 +3349,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); } else { mbmi->skip = 1; - if (output_enabled) + if (output_enabled && + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) cm->counts.skip[vp9_get_skip_context(xd)][1]++; reset_skip_context(xd, MAX(bsize, BLOCK_8X8)); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 3b231b7f2..8581e6117 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -406,7 +406,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; default: assert(0 && "Invalid transform size"); @@ -428,7 +428,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(x, plane, block, plane_bsize, tx_size); if (p->eobs[block] > 0) - xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); } void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { @@ -574,7 +574,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, dst_stride, *eob); + x->itxm_add(dqcoeff, dst, dst_stride, *eob); else vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 1f68f03c4..0ebc93638 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -393,11 +393,6 @@ static void set_speed_features(VP9_COMP *cpi) { // Set rd thresholds based on mode and speed setting vp9_set_rd_speed_thresholds(cpi); vp9_set_rd_speed_thresholds_sub8x8(cpi); - - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; - if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { - cpi->mb.fwd_txm4x4 = vp9_fwht4x4; - } } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { @@ -596,16 +591,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { if (cpi->oxcf.mode == REALTIME) cpi->oxcf.play_alternate = 0; - cpi->oxcf.lossless = oxcf->lossless; - if (cpi->oxcf.lossless) { - // In lossless mode, make sure right quantizer range and correct transform - // is set. - cpi->oxcf.worst_allowed_q = 0; - cpi->oxcf.best_allowed_q = 0; - cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; - } else { - cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; - } rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; @@ -627,36 +612,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { // local file playback mode == really big buffer if (cpi->oxcf.rc_mode == RC_MODE_VBR) { - cpi->oxcf.starting_buffer_level = 60000; - cpi->oxcf.optimal_buffer_level = 60000; - cpi->oxcf.maximum_buffer_size = 240000; + cpi->oxcf.starting_buffer_level_ms = 60000; + cpi->oxcf.optimal_buffer_level_ms = 60000; + cpi->oxcf.maximum_buffer_size_ms = 240000; } - // Convert target bandwidth from Kbit/s to Bit/s - cpi->oxcf.target_bandwidth *= 1000; - - cpi->oxcf.starting_buffer_level = - vp9_rescale(cpi->oxcf.starting_buffer_level, - cpi->oxcf.target_bandwidth, 1000); + rc->starting_buffer_level = vp9_rescale(cpi->oxcf.starting_buffer_level_ms, + cpi->oxcf.target_bandwidth, 1000); // Set or reset optimal and maximum buffer levels. - if (cpi->oxcf.optimal_buffer_level == 0) - cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; + if (cpi->oxcf.optimal_buffer_level_ms == 0) + rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.optimal_buffer_level = - vp9_rescale(cpi->oxcf.optimal_buffer_level, - cpi->oxcf.target_bandwidth, 1000); + rc->optimal_buffer_level = vp9_rescale(cpi->oxcf.optimal_buffer_level_ms, + cpi->oxcf.target_bandwidth, 1000); - if (cpi->oxcf.maximum_buffer_size == 0) - cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; + if (cpi->oxcf.maximum_buffer_size_ms == 0) + rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.maximum_buffer_size = - vp9_rescale(cpi->oxcf.maximum_buffer_size, - cpi->oxcf.target_bandwidth, 1000); + rc->maximum_buffer_size = vp9_rescale(cpi->oxcf.maximum_buffer_size_ms, + cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size); - rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); + rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); @@ -1442,21 +1421,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, vp8_yv12_extend_frame_borders_c(dst); } -static int find_fp_qindex() { - int i; - - for (i = 0; i < QINDEX_RANGE; i++) { - if (vp9_convert_qindex_to_q(i) >= 30.0) { - break; - } - } - - if (i == QINDEX_RANGE) - i--; - - return i; -} - #define WRITE_RECON_BUFFER 0 #if WRITE_RECON_BUFFER void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { @@ -2118,8 +2082,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_clear_system_state(); - vp9_zero(cpi->rd.tx_select_threshes); - #if CONFIG_VP9_POSTPROC if (cpi->oxcf.noise_sensitivity > 0) { int l = 0; @@ -2313,17 +2275,6 @@ static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, encode_frame_to_data_rate(cpi, size, dest, frame_flags); } -static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, - unsigned int *frame_flags) { - (void) size; - (void) dest; - (void) frame_flags; - - vp9_rc_get_first_pass_params(cpi); - vp9_set_quantizer(&cpi->common, find_fp_qindex()); - vp9_first_pass(cpi); -} - static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; @@ -2663,7 +2614,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->pass == 1 && (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { - Pass1Encode(cpi, size, dest, frame_flags); + const int lossless = is_lossless_requested(&cpi->oxcf); + cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; + cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; + vp9_first_pass(cpi); } else if (cpi->pass == 2 && (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass2Encode(cpi, size, dest, frame_flags); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 47c901975..1ba763414 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -216,9 +216,9 @@ typedef struct VP9EncoderConfig { int over_shoot_pct; // buffering parameters - int64_t starting_buffer_level; // in seconds - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; + int64_t starting_buffer_level_ms; + int64_t optimal_buffer_level_ms; + int64_t maximum_buffer_size_ms; // Frame drop threshold. int drop_frames_water_mark; @@ -228,7 +228,6 @@ typedef struct VP9EncoderConfig { int worst_allowed_q; int best_allowed_q; int cq_level; - int lossless; AQ_MODE aq_mode; // Adaptive Quantization mode // Internal frame size scaling. @@ -286,6 +285,10 @@ typedef struct VP9EncoderConfig { vp8e_tuning tuning; } VP9EncoderConfig; +static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { + return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; +} + static INLINE int is_best_mode(MODE mode) { return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST; } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 0b9a5ac7a..dc3832b16 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -169,7 +169,6 @@ static void zero_stats(FIRSTPASS_STATS *section) { section->intra_error = 0.0; section->coded_error = 0.0; section->sr_coded_error = 0.0; - section->ssim_weighted_pred_err = 0.0; section->pcnt_inter = 0.0; section->pcnt_motion = 0.0; section->pcnt_second_ref = 0.0; @@ -194,7 +193,6 @@ static void accumulate_stats(FIRSTPASS_STATS *section, section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->sr_coded_error += frame->sr_coded_error; - section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err; section->pcnt_inter += frame->pcnt_inter; section->pcnt_motion += frame->pcnt_motion; section->pcnt_second_ref += frame->pcnt_second_ref; @@ -217,7 +215,6 @@ static void subtract_stats(FIRSTPASS_STATS *section, section->intra_error -= frame->intra_error; section->coded_error -= frame->coded_error; section->sr_coded_error -= frame->sr_coded_error; - section->ssim_weighted_pred_err -= frame->ssim_weighted_pred_err; section->pcnt_inter -= frame->pcnt_inter; section->pcnt_motion -= frame->pcnt_motion; section->pcnt_second_ref -= frame->pcnt_second_ref; @@ -241,7 +238,6 @@ static void avg_stats(FIRSTPASS_STATS *section) { section->intra_error /= section->count; section->coded_error /= section->count; section->sr_coded_error /= section->count; - section->ssim_weighted_pred_err /= section->count; section->pcnt_inter /= section->count; section->pcnt_second_ref /= section->count; section->pcnt_neutral /= section->count; @@ -262,71 +258,14 @@ static double calculate_modified_err(const TWO_PASS *twopass, const VP9EncoderConfig *oxcf, const FIRSTPASS_STATS *this_frame) { const FIRSTPASS_STATS *const stats = &twopass->total_stats; - const double av_err = stats->ssim_weighted_pred_err / stats->count; + const double av_err = stats->coded_error / stats->count; const double modified_error = av_err * - pow(this_frame->ssim_weighted_pred_err / DOUBLE_DIVIDE_CHECK(av_err), + pow(this_frame->coded_error / DOUBLE_DIVIDE_CHECK(av_err), oxcf->two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); } -static const double weight_table[256] = { - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.031250, 0.062500, - 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, 0.250000, 0.281250, - 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, 0.500000, - 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750, - 0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, - 0.968750, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000 -}; - -static double simple_weight(const YV12_BUFFER_CONFIG *buf) { - int i, j; - double sum = 0.0; - const int w = buf->y_crop_width; - const int h = buf->y_crop_height; - const uint8_t *row = buf->y_buffer; - - for (i = 0; i < h; ++i) { - const uint8_t *pixel = row; - for (j = 0; j < w; ++j) - sum += weight_table[*pixel++]; - row += buf->y_stride; - } - - return MAX(0.1, sum / (w * h)); -} - // This function returns the maximum target rate per frame. static int frame_max_bits(const RATE_CONTROL *rc, const VP9EncoderConfig *oxcf) { @@ -459,6 +398,32 @@ static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { } } +static int find_fp_qindex() { + int i; + + for (i = 0; i < QINDEX_RANGE; ++i) + if (vp9_convert_qindex_to_q(i) >= 30.0) + break; + + if (i == QINDEX_RANGE) + i--; + + return i; +} + +static void set_first_pass_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (!cpi->refresh_alt_ref_frame && + (cm->current_video_frame == 0 || + (cpi->frame_flags & FRAMEFLAGS_KEY))) { + cm->frame_type = KEY_FRAME; + } else { + cm->frame_type = INTER_FRAME; + } + // Do not use periodic key frames. + cpi->rc.frames_to_key = INT_MAX; +} + void vp9_first_pass(VP9_COMP *cpi) { int mb_row, mb_col; MACROBLOCK *const x = &cpi->mb; @@ -495,9 +460,13 @@ void vp9_first_pass(VP9_COMP *cpi) { TWO_PASS *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; + FIRSTPASS_STATS fps; vp9_clear_system_state(); + set_first_pass_params(cpi); + vp9_set_quantizer(cm, find_fp_qindex()); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { MV_REFERENCE_FRAME ref_frame = LAST_FRAME; const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; @@ -789,14 +758,11 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_clear_system_state(); { - FIRSTPASS_STATS fps; - fps.frame = cm->current_video_frame; fps.spatial_layer_id = cpi->svc.spatial_layer_id; fps.intra_error = (double)(intra_error >> 8); fps.coded_error = (double)(coded_error >> 8); fps.sr_coded_error = (double)(sr_coded_error >> 8); - fps.ssim_weighted_pred_err = fps.coded_error * simple_weight(cpi->Source); fps.count = 1.0; fps.pcnt_inter = (double)intercount / cm->MBs; fps.pcnt_second_ref = (double)second_ref_count / cm->MBs; @@ -830,8 +796,7 @@ void vp9_first_pass(VP9_COMP *cpi) { fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start); // Don't want to do output stats with a stack variable! - twopass->this_frame_stats = fps; - output_stats(&twopass->this_frame_stats, cpi->output_pkt_list); + output_stats(&fps, cpi->output_pkt_list); accumulate_stats(&twopass->total_stats, &fps); } @@ -839,9 +804,9 @@ void vp9_first_pass(VP9_COMP *cpi) { // the prediction is good enough... but also don't allow it to lag too far. if ((twopass->sr_update_lag > 3) || ((cm->current_video_frame > 0) && - (twopass->this_frame_stats.pcnt_inter > 0.20) && - ((twopass->this_frame_stats.intra_error / - DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { + (fps.pcnt_inter > 0.20) && + ((fps.intra_error / + DOUBLE_DIVIDE_CHECK(fps.coded_error)) > 2.0))) { if (gld_yv12 != NULL) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } @@ -1002,8 +967,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // Scan the first pass file and calculate a modified total error based upon // the bias/power function used to allocate bits. { - const double avg_error = stats->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(stats->count); + const double avg_error = stats->coded_error / + DOUBLE_DIVIDE_CHECK(stats->count); const FIRSTPASS_STATS *s = twopass->stats_in; double modified_error_total = 0.0; twopass->modified_error_min = (avg_error * @@ -1523,6 +1488,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame; const FIRSTPASS_STATS *const start_pos = twopass->stats_in; int i; + double boost_score = 0.0; double old_boost_score = 0.0; double gf_group_err = 0.0; @@ -1639,7 +1605,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break out conditions. if ( - // Break at cpi->max_gf_interval unless almost totally static. + // Break at active_max_gf_interval unless almost totally static. (i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) || ( // Don't break out with a very short interval. @@ -2114,19 +2080,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->modified_error_left -= kf_group_err; } -void vp9_rc_get_first_pass_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || - (cpi->frame_flags & FRAMEFLAGS_KEY))) { - cm->frame_type = KEY_FRAME; - } else { - cm->frame_type = INTER_FRAME; - } - // Do not use periodic key frames. - cpi->rc.frames_to_key = INT_MAX; -} - // For VBR...adjustment to the frame target based on error from previous frames void vbr_rate_correction(int * this_frame_target, const int64_t vbr_bits_off_target) { diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 309638c1e..c89cfaf8d 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -44,7 +44,6 @@ typedef struct { unsigned int section_intra_rating; unsigned int next_iiratio; FIRSTPASS_STATS total_stats; - FIRSTPASS_STATS this_frame_stats; const FIRSTPASS_STATS *stats_in; const FIRSTPASS_STATS *stats_in_start; const FIRSTPASS_STATS *stats_in_end; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 041e583fd..842bc5b9d 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -72,8 +72,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, x->mv_row_max = tmp_row_max; return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, - INT_MAX); + xd->plane[0].dst.buf, xd->plane[0].dst.stride); } static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, @@ -86,8 +85,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, - INT_MAX); + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the @@ -123,8 +121,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, - INT_MAX); + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; @@ -147,7 +144,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { xd->plane[0].dst.buf, xd->plane[0].dst.stride, 0, 0, 0); err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err); + xd->plane[0].dst.buf, xd->plane[0].dst.stride); // find best if (err < best_err) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 4f7d6f17c..dbd19a2d6 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -524,9 +524,8 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv, - sad_per_bit); + get_buf_from_mv(in_what, ref_mv), in_what->stride) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -542,7 +541,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[t][i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -553,7 +552,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -585,7 +584,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[s][i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -596,7 +595,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -623,7 +622,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[s][next_chkpts_indices[i]].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -634,7 +633,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -661,7 +660,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + neighbors[i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -672,7 +671,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -894,8 +893,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, *best_mv = *ref_mv; *num00 = 11; best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); start_row = MAX(-range, x->mv_row_min - ref_mv->row); start_col = MAX(-range, x->mv_col_min - ref_mv->col); @@ -929,7 +927,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, for (i = 0; i < end_col - c; ++i) { const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -975,7 +973,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // Check the starting position best_sad = fn_ptr->sdf(what->buf, what->stride, - best_address, in_what->stride, 0x7fffffff) + + best_address, in_what->stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -986,8 +984,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_mv->col + ss[i].mv.col}; if (is_mv_in(x, &mv)) { int sad = fn_ptr->sdf(what->buf, what->stride, - best_address + ss[i].offset, in_what->stride, - best_sad); + best_address + ss[i].offset, in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -1012,7 +1009,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { int sad = fn_ptr->sdf(what->buf, what->stride, best_address + ss[best_site].offset, - in_what->stride, best_sad); + in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -1077,7 +1074,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, best_address = in_what; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1129,7 +1126,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[i].offset + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); @@ -1154,7 +1151,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[best_site].offset + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1253,7 +1250,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1261,7 +1258,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, for (c = col_min; c < col_max; ++c) { const MV mv = {r, c}; const int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + get_buf_from_mv(in_what, &mv), in_what->stride) + mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; @@ -1286,7 +1283,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1320,7 +1317,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, while (c < col_max) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - check_here, in_what->stride, best_sad); + check_here, in_what->stride); if (sad < best_sad) { const MV mv = {r, c}; sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); @@ -1351,7 +1348,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1409,7 +1406,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, while (c < col_max) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - check_here, in_what->stride, best_sad); + check_here, in_what->stride); if (sad < best_sad) { const MV mv = {r, c}; sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); @@ -1438,7 +1435,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride, 0x7fffffff) + + in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1450,7 +1447,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, ref_mv->col + neighbors[j].col}; if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -1483,7 +1480,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, - in_what->stride, 0x7fffffff) + + in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1524,7 +1521,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride, best_sad); + in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -1563,8 +1560,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - second_pred, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1577,8 +1573,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, - second_pred, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride, second_pred); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 11633a73d..913b8ead4 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -280,8 +280,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; - if (cpi->sf.disable_inter_mode_mask[bsize] & - (1 << INTER_OFFSET(this_mode))) + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]], diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 9ad851754..fb8246201 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -159,7 +159,7 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { lrc->bits_off_target += bits_off_for_this_layer; // Clip buffer level to maximum buffer size for the layer. - lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = lrc->bits_off_target; } } @@ -167,7 +167,6 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { // Update the buffer level: leaky bucket model. static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { const VP9_COMMON *const cm = &cpi->common; - const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; // Non-viewable frames are a special case and are treated as pure overhead. @@ -178,7 +177,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { } // Clip the buffer level to the maximum specified buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; if (cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR) { @@ -200,12 +199,11 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { oxcf->best_allowed_q) / 2; } - rc->last_q[0] = oxcf->best_allowed_q; - rc->last_q[1] = oxcf->best_allowed_q; - rc->last_q[2] = oxcf->best_allowed_q; + rc->last_q[KEY_FRAME] = oxcf->best_allowed_q; + rc->last_q[INTER_FRAME] = oxcf->best_allowed_q; - rc->buffer_level = oxcf->starting_buffer_level; - rc->bits_off_target = oxcf->starting_buffer_level; + rc->buffer_level = rc->starting_buffer_level; + rc->bits_off_target = rc->starting_buffer_level; rc->rolling_target_bits = rc->avg_frame_bandwidth; rc->rolling_actual_bits = rc->avg_frame_bandwidth; @@ -251,7 +249,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { // If buffer is below drop_mark, for now just drop every other frame // (starting with the next frame) until it increases back over drop_mark. int drop_mark = (int)(oxcf->drop_frames_water_mark * - oxcf->optimal_buffer_level / 100); + rc->optimal_buffer_level / 100); if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { --rc->decimation_factor; @@ -445,10 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). const VP9_COMMON *const cm = &cpi->common; - const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. - int64_t critical_level = oxcf->optimal_buffer_level >> 2; + int64_t critical_level = rc->optimal_buffer_level >> 2; int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; @@ -460,26 +457,26 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { else active_worst_quality = MIN(rc->worst_quality, rc->avg_frame_qindex[KEY_FRAME] * 3 / 2); - if (rc->buffer_level > oxcf->optimal_buffer_level) { + if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. // Maximum limit for down adjustment, ~30%. int max_adjustment_down = active_worst_quality / 3; if (max_adjustment_down) { - buff_lvl_step = ((oxcf->maximum_buffer_size - - oxcf->optimal_buffer_level) / max_adjustment_down); + buff_lvl_step = ((rc->maximum_buffer_size - + rc->optimal_buffer_level) / max_adjustment_down); if (buff_lvl_step) - adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) / + adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) / buff_lvl_step); active_worst_quality -= adjustment; } } else if (rc->buffer_level > critical_level) { // Adjust up from ambient Q. if (critical_level) { - buff_lvl_step = (oxcf->optimal_buffer_level - critical_level); + buff_lvl_step = (rc->optimal_buffer_level - critical_level); if (buff_lvl_step) { adjustment = (int)((rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) * - (oxcf->optimal_buffer_level - rc->buffer_level) / + (rc->optimal_buffer_level - rc->buffer_level) / buff_lvl_step); } active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment; @@ -1090,7 +1087,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && !(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) { - rc->last_q[2] = qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2); } else { @@ -1229,8 +1225,8 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; const SVC *const svc = &cpi->svc; - const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; - const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; + const int64_t diff = rc->optimal_buffer_level - rc->buffer_level; + const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100; int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->avg_frame_bandwidth; if (svc->number_temporal_layers > 1 && @@ -1261,8 +1257,8 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const SVC *const svc = &cpi->svc; int target; if (cpi->common.current_video_frame == 0) { - target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX) - ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2); + target = ((rc->starting_buffer_level / 2) > INT_MAX) + ? INT_MAX : (int)(rc->starting_buffer_level / 2); } else { int kf_boost = 32; double framerate = oxcf->framerate; @@ -1390,6 +1386,24 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } +void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf, + RATE_CONTROL *const rc) { + // Set Maximum gf/arf interval + rc->max_gf_interval = 16; + + // Extended interval for genuinely static scenes + rc->static_scene_max_gf_interval = oxcf->key_freq >> 1; + + // Special conditions when alt ref frame enabled + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; +} + void vp9_rc_update_framerate(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1414,21 +1428,5 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); - // Set Maximum gf/arf interval - rc->max_gf_interval = 16; - - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1; - - // Special conditions when alt ref frame enabled in lagged compress mode - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (rc->max_gf_interval > oxcf->lag_in_frames - 1) - rc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; + vp9_rc_set_gf_max_interval(oxcf, rc); } diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index b1cc67609..1d7cfdc2a 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -30,7 +30,7 @@ typedef struct { int this_frame_target; // Actual frame target after rc adjustment. int projected_frame_size; int sb64_target_rate; - int last_q[3]; // Separate values for Intra/Inter/ARF-GF + int last_q[FRAME_TYPES]; // Separate values for Intra/Inter int last_boosted_qindex; // Last boosted GF/KF/ARF q int gfu_boost; @@ -84,6 +84,10 @@ typedef struct { int worst_quality; int best_quality; + + int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; // int active_best_quality; } RATE_CONTROL; @@ -178,6 +182,9 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, void vp9_rc_update_framerate(struct VP9_COMP *cpi); +void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf, + RATE_CONTROL *const rc); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 1cdc912d8..f68aa2738 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1675,9 +1675,9 @@ static INLINE int mv_has_subpel(const MV *mv) { static int check_best_zero_mv( const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int disable_inter_mode_mask, int this_mode, + int inter_mode_mask, int this_mode, const MV_REFERENCE_FRAME ref_frames[2]) { - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + if ((inter_mode_mask & (1 << ZEROMV)) && (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && frame_mv[this_mode][ref_frames[0]].as_int == 0 && (ref_frames[1] == NONE || @@ -1743,7 +1743,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); - const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; vp9_zero(*bsi); @@ -1792,11 +1792,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; - if (disable_inter_mode_mask & (1 << mode_idx)) + if (!(inter_mode_mask & (1 << this_mode))) continue; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, + inter_mode_mask, this_mode, mbmi->ref_frame)) continue; @@ -2129,8 +2129,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, // Find sad for current vector. this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_ptr, ref_y_stride, - 0x7fffffff); + ref_y_ptr, ref_y_stride); // Note if it is the best so far. if (this_sad < best_sad) { @@ -2208,8 +2207,6 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm, static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, - int_mv *ref_mv, - int_mv *second_ref_mv, int64_t comp_pred_diff[REFERENCE_MODES], const int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { @@ -3066,7 +3063,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; - int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; vp9_zero(best_mbmode); x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3132,6 +3129,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { const int inter_non_zero_mode_mask = 0x1F7F7; mode_skip_mask |= inter_non_zero_mode_mask; + mode_skip_mask &= ~(1 << THR_ZEROMV); + inter_mode_mask = (1 << ZEROMV); } // Disable this drop out case if the ref frame @@ -3165,7 +3164,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (bsize > cpi->sf.max_intra_bsize) { - mode_skip_mask |= 0xFF30808; + const int all_intra_modes = (1 << THR_DC) | (1 << THR_TM) | + (1 << THR_H_PRED) | (1 << THR_V_PRED) | (1 << THR_D135_PRED) | + (1 << THR_D207_PRED) | (1 << THR_D153_PRED) | (1 << THR_D63_PRED) | + (1 << THR_D117_PRED) | (1 << THR_D45_PRED); + mode_skip_mask |= all_intra_modes; } if (!x->in_active_map) { @@ -3179,7 +3182,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_index = THR_ZEROMV; mode_skip_mask = ~(1 << mode_index); mode_skip_start = MAX_MODES; - disable_inter_mode_mask = 0; + inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | + (1 << NEWMV); } for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { @@ -3226,8 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - if (ref_frame != INTRA_FRAME && - disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) + if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) continue; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; @@ -3276,7 +3279,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, this_mode, ref_frames)) + inter_mode_mask, this_mode, ref_frames)) continue; } } @@ -3614,9 +3617,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; @@ -3665,7 +3665,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; int best_skip2 = 0; - int ref_frame_mask = 0; int mode_skip_mask = 0; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3700,17 +3699,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - for (ref_frame = LAST_FRAME; - ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { - int i; - for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { - if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { - ref_frame_mask |= (1 << ref_frame); - break; - } - } - } - for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3805,11 +3793,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && - ref_frame != INTRA_FRAME) { - continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. @@ -4034,15 +4017,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, - SEG_LVL_SKIP); + // Skip is never coded at the segment level for sub8x8 blocks and instead + // always coded in the bitstream at the mode info level. - if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { + if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -4057,7 +4035,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate_uv = 0; this_skip2 = 1; } - } else if (mb_skip_allowed) { + } else { // Add in the cost of the no skip flag. rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } @@ -4247,9 +4225,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_ref_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 5ea09a8a7..e85d08a6d 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -99,41 +99,44 @@ static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, int step_param, int error_per_bit, const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { + const SPEED_FEATURES *const sf = &cpi->sf; + const SEARCH_METHODS method = sf->search_method; + vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; - if (cpi->sf.search_method == FAST_DIAMOND) { - var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == FAST_HEX) { - var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == HEX) { - var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == SQUARE) { - var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == BIGDIA) { - var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else { - int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, - further_steps, 1, &cpi->fn_ptr[bsize], - ref_mv, tmp_mv); + switch (method) { + case FAST_DIAMOND: + var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case FAST_HEX: + var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case HEX: + var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case SQUARE: + var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case BIGDIA: + var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case NSTEP: + var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, + (sf->max_step_search_steps - 1) - step_param, + 1, fn_ptr, ref_mv, tmp_mv); + break; + default: + assert(!"Invalid search method."); } + if (method != NSTEP && rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); + return var; } diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 892e90551..d06263676 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -35,14 +35,12 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride, #define sadMxN(m, n) \ unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int max_sad) { \ + const uint8_t *ref, int ref_stride) { \ return sad(src, src_stride, ref, ref_stride, m, n); \ } \ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred, \ - unsigned int max_sad) { \ + const uint8_t *second_pred) { \ uint8_t comp_pred[m * n]; \ vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ return sad(src, src_stride, comp_pred, m, m, n); \ @@ -54,8 +52,7 @@ void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ unsigned int *sads) { \ int i; \ for (i = 0; i < k; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride, \ - 0x7fffffff); \ + sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \ } #define sadMxNx4D(m, n) \ @@ -64,8 +61,7 @@ void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ unsigned int *sads) { \ int i; \ for (i = 0; i < 4; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride, \ - 0x7fffffff); \ + sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ } // 64x64 diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 7c3abd5d7..b7f839747 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -14,20 +14,23 @@ #include "vp9/encoder/vp9_speed_features.h" enum { - ALL_INTRA_MODES = (1 << DC_PRED) | + INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) | (1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED), - - INTRA_DC_ONLY = (1 << DC_PRED), - - INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED), - + INTRA_DC = (1 << DC_PRED), + INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), + INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | + (1 << H_PRED) +}; - INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED) +enum { + INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), + INTER_NEAREST = (1 << NEARESTMV), + INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) }; enum { @@ -140,8 +143,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->search_method = HEX; sf->disable_filter_search_var_thresh = 500; for (i = 0; i < TX_SIZES; ++i) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_y_mode_mask[i] = INTRA_DC; + sf->intra_uv_mode_mask[i] = INTRA_DC; } cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; } @@ -156,7 +159,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->adaptive_rd_thresh = 1; sf->use_fast_coef_costing = 1; - if (speed == 1) { + if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD @@ -179,13 +182,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, } if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD - : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; else sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; @@ -193,28 +192,18 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; sf->reference_masking = 1; - sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } if (speed >= 3) { @@ -246,15 +235,15 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->subpel_force_stop = 1; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC; } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->search_method = FAST_HEX; - sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); - sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; sf->max_intra_bsize = BLOCK_32X32; sf->allow_skip_recode = 1; } @@ -285,7 +274,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, if (speed >= 7) { int i; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->inter_mode_mask[i] = INTER_NEAREST; } } @@ -302,7 +291,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; sf->subpel_force_stop = 0; - sf->optimize_coefficients = !oxcf->lossless; + sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf); sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; @@ -330,8 +319,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 0; for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_y_mode_mask[i] = INTRA_ALL; + sf->intra_uv_mode_mask[i] = INTRA_ALL; } sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; @@ -343,7 +332,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->use_nonrd_pick_mode = 0; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; + sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_64X64; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index a54599e6a..3e7cd27d8 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -331,8 +331,8 @@ typedef struct SPEED_FEATURES { int use_nonrd_pick_mode; // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; + // modes are used in order from LSB to MSB for each BLOCK_SIZE. + int inter_mode_mask[BLOCK_SIZES]; // This feature controls whether we do the expensive context update and // calculation in the rd coefficient costing loop. diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index dd28496be..1b995757a 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -46,16 +46,15 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->key_frame_rate_correction_factor = 1.0; if (svc->number_temporal_layers > 1) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; - lrc->last_q[0] = oxcf->best_allowed_q; - lrc->last_q[1] = oxcf->best_allowed_q; - lrc->last_q[2] = oxcf->best_allowed_q; + lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; + lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; + lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; } - lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), + lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level_ms), lc->target_bandwidth, 1000); lrc->bits_off_target = lrc->buffer_level; } @@ -82,20 +81,20 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, RATE_CONTROL *const lrc = &lc->rc; if (svc->number_temporal_layers > 1) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; } bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. - lc->starting_buffer_level = - (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); - lc->optimal_buffer_level = - (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc); - lc->maximum_buffer_size = - (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); + lrc->starting_buffer_level = + (int64_t)(rc->starting_buffer_level * bitrate_alloc); + lrc->optimal_buffer_level = + (int64_t)(rc->optimal_buffer_level * bitrate_alloc); + lrc->maximum_buffer_size = + (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. if (svc->number_temporal_layers > 1) { lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; @@ -132,8 +131,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { } else { const double prev_layer_framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; - const int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[layer - 1] * 1000; + const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1]; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); @@ -151,20 +149,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { oxcf->two_pass_vbrmin_section / 100); lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); - lrc->max_gf_interval = 16; - - lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1; - - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) - lrc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) - lrc->max_gf_interval = lrc->static_scene_max_gf_interval; + vp9_rc_set_gf_max_interval(oxcf, lrc); } void vp9_restore_layer_context(VP9_COMP *const cpi) { @@ -175,9 +160,6 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->rc = lc->rc; cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; - cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; - cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; - cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1) { @@ -193,9 +175,6 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { lc->rc = cpi->rc; lc->twopass = cpi->twopass; lc->target_bandwidth = (int)oxcf->target_bandwidth; - lc->starting_buffer_level = oxcf->starting_buffer_level; - lc->optimal_buffer_level = oxcf->optimal_buffer_level; - lc->maximum_buffer_size = oxcf->maximum_buffer_size; } void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 6881ce1e7..36e2027fd 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -22,9 +22,6 @@ extern "C" { typedef struct { RATE_CONTROL rc; int target_bandwidth; - int64_t starting_buffer_level; - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; double framerate; int avg_frame_size; TWO_PASS twopass; diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 91d8ea4dc..eb5ae2e41 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -156,6 +156,18 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } +void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + +void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); +} + unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index c47fe1335..4a194b72c 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -25,15 +25,13 @@ void variance(const uint8_t *a, int a_stride, typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, - int ref_stride, - unsigned int max_sad); + int ref_stride); typedef unsigned int(*vp9_sad_avg_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, - const uint8_t *second_pred, - unsigned int max_sad); + const uint8_t *second_pred); typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr, int source_stride, diff --git a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm index 8723a7114..28458dcdd 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm @@ -23,6 +23,7 @@ pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1 %endmacro +TRANSFORM_COEFFS 11585, 11585 TRANSFORM_COEFFS 15137, 6270 TRANSFORM_COEFFS 16069, 3196 TRANSFORM_COEFFS 9102, 13623 @@ -83,7 +84,7 @@ SECTION .text %endmacro ; 1D forward 8x8 DCT transform -%macro FDCT8_1D 0 +%macro FDCT8_1D 1 SUM_SUB 0, 7, 9 SUM_SUB 1, 6, 9 SUM_SUB 2, 5, 9 @@ -92,14 +93,21 @@ SECTION .text SUM_SUB 0, 3, 9 SUM_SUB 1, 2, 9 SUM_SUB 6, 5, 9 +%if %1 == 0 SUM_SUB 0, 1, 9 +%endif BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 pmulhrsw m6, m12 pmulhrsw m5, m12 +%if %1 == 0 pmulhrsw m0, m12 pmulhrsw m1, m12 +%else + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 + SWAP 0, 1 +%endif SUM_SUB 4, 5, 9 SUM_SUB 7, 6, 9 @@ -150,10 +158,10 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride psllw m7, 2 ; column transform - FDCT8_1D + FDCT8_1D 0 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - FDCT8_1D + FDCT8_1D 1 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 DIVIDE_ROUND_2X 0, 1, 9, 10 diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm index 673e0b3a6..21aaa9383 100644 --- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm +++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm @@ -43,9 +43,9 @@ sym(vp9_temporal_filter_apply_sse2): mov [rsp + rbp_backup], rbp ; end prolog - mov rdx, arg(3) + mov edx, arg(3) mov [rsp + block_width], rdx - mov rdx, arg(4) + mov edx, arg(4) mov [rsp + block_height], rdx movd xmm6, arg(5) movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index 6a82038ab..e935a233a 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -13,50 +13,27 @@ #include "vp9/encoder/vp9_variance.h" #include "vpx_ports/mem.h" -extern unsigned int vp9_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -unsigned int vp9_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -unsigned int vp9_get8x8var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -typedef unsigned int (*get_var_sse2) ( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -static void variance_sse2(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, - int w, int h, unsigned int *sse, int *sum, - get_var_sse2 var_fn, int block_size) { - unsigned int sse0; - int sum0; +typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +unsigned int vp9_get4x4var_mmx(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + + +unsigned int vp9_get8x8var_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +unsigned int vp9_get16x16var_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +static void variance_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + int w, int h, unsigned int *sse, int *sum, + variance_fn_t var_fn, int block_size) { int i, j; *sse = 0; @@ -64,217 +41,139 @@ static void variance_sse2(const unsigned char *src_ptr, int source_stride, for (i = 0; i < h; i += block_size) { for (j = 0; j < w; j += block_size) { - var_fn(src_ptr + source_stride * i + j, source_stride, - ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); + unsigned int sse0; + int sum0; + var_fn(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); *sse += sse0; *sum += sum0; } } } -unsigned int vp9_variance4x4_sse2( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, - &var, &avg, vp9_get4x4var_mmx, 4); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); +unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 4, 4, + sse, &sum, vp9_get4x4var_mmx, 4); + return *sse - (((unsigned int)sum * sum) >> 4); } -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, - &var, &avg, vp9_get4x4var_mmx, 4); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 5)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 4, + sse, &sum, vp9_get4x4var_mmx, 4); + return *sse - (((unsigned int)sum * sum) >> 5); } -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, - &var, &avg, vp9_get4x4var_mmx, 4); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 5)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 4, 8, + sse, &sum, vp9_get4x4var_mmx, 4); + return *sse - (((unsigned int)sum * sum) >> 5); } -unsigned int vp9_variance8x8_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, - &var, &avg, vp9_get8x8var_sse2, 8); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 6)); +unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 8, + sse, &sum, vp9_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 6); } -unsigned int vp9_variance16x8_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, - &var, &avg, vp9_get8x8var_sse2, 8); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); +unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 8, + sse, &sum, vp9_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 7); } -unsigned int vp9_variance8x16_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, - &var, &avg, vp9_get8x8var_sse2, 8); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); +unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 16, + sse, &sum, vp9_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 7); } -unsigned int vp9_variance16x16_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); +unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 16, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((unsigned int)sum * sum) >> 8); } -unsigned int vp9_mse16x16_sse2( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0; - int sum0; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, - &sum0); - *sse = sse0; - return sse0; +unsigned int vp9_mse16x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + return *sse; } -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 10)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 32, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 10); } -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 9)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 16, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 9); } -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 9)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 32, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 9); } -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 12)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 64, 64, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 12); } -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 11)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 64, 32, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 11); } -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 11)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 64, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 11); } #define DECL(w, opt) \ diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 5a8a4f4fe..72768e11e 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -327,11 +327,14 @@ static vpx_codec_err_t set_encoder_config( else if (cfg->rc_end_usage == VPX_CBR) oxcf->rc_mode = RC_MODE_CBR; - oxcf->target_bandwidth = cfg->rc_target_bitrate; + // Convert target bandwidth from Kbit/s to Bit/s + oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; - oxcf->best_allowed_q = vp9_quantizer_to_qindex(cfg->rc_min_quantizer); - oxcf->worst_allowed_q = vp9_quantizer_to_qindex(cfg->rc_max_quantizer); + oxcf->best_allowed_q = + extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_min_quantizer); + oxcf->worst_allowed_q = + extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_max_quantizer); oxcf->cq_level = vp9_quantizer_to_qindex(extra_cfg->cq_level); oxcf->fixed_q = -1; @@ -342,9 +345,9 @@ static vpx_codec_err_t set_encoder_config( oxcf->scaled_frame_width = cfg->rc_scaled_width; oxcf->scaled_frame_height = cfg->rc_scaled_height; - oxcf->maximum_buffer_size = cfg->rc_buf_sz; - oxcf->starting_buffer_level = cfg->rc_buf_initial_sz; - oxcf->optimal_buffer_level = cfg->rc_buf_optimal_sz; + oxcf->maximum_buffer_size_ms = cfg->rc_buf_sz; + oxcf->starting_buffer_level_ms = cfg->rc_buf_initial_sz; + oxcf->optimal_buffer_level_ms = cfg->rc_buf_optimal_sz; oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh; @@ -375,8 +378,6 @@ static vpx_codec_err_t set_encoder_config( oxcf->tile_columns = extra_cfg->tile_columns; oxcf->tile_rows = extra_cfg->tile_rows; - oxcf->lossless = extra_cfg->lossless; - oxcf->error_resilient_mode = cfg->g_error_resilient; oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode; @@ -387,7 +388,9 @@ static vpx_codec_err_t set_encoder_config( oxcf->ss_number_layers = cfg->ss_number_layers; if (oxcf->ss_number_layers > 1) { - vp9_copy(oxcf->ss_target_bitrate, cfg->ss_target_bitrate); + int i; + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) + oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i]; } else if (oxcf->ss_number_layers == 1) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; } @@ -395,8 +398,11 @@ static vpx_codec_err_t set_encoder_config( oxcf->ts_number_layers = cfg->ts_number_layers; if (oxcf->ts_number_layers > 1) { - vp9_copy(oxcf->ts_target_bitrate, cfg->ts_target_bitrate); - vp9_copy(oxcf->ts_rate_decimator, cfg->ts_rate_decimator); + int i; + for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) { + oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i]; + oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i]; + } } else if (oxcf->ts_number_layers == 1) { oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth; oxcf->ts_rate_decimator[0] = 1; @@ -462,64 +468,154 @@ static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx, return res; } -static vpx_codec_err_t ctrl_get_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, - va_list args) { - void *const arg = va_arg(args, void *); - -#define MAP(id, var) case id: *(RECAST(id, arg)) = var; break +static vpx_codec_err_t ctrl_get_quantizer(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const arg = va_arg(args, int *); + if (arg == NULL) + return VPX_CODEC_INVALID_PARAM; + *arg = vp9_get_quantizer(ctx->cpi); + return VPX_CODEC_OK; +} +static vpx_codec_err_t ctrl_get_quantizer64(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; + *arg = vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi)); + return VPX_CODEC_OK; +} - switch (ctrl_id) { - MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi)); - MAP(VP8E_GET_LAST_QUANTIZER_64, - vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi))); +static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx, + const struct vp9_extracfg *extra_cfg) { + const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg); + if (res == VPX_CODEC_OK) { + ctx->extra_cfg = *extra_cfg; + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); + vp9_change_config(ctx->cpi, &ctx->oxcf); } + return res; +} - return VPX_CODEC_OK; -#undef MAP +static vpx_codec_err_t ctrl_set_cpuused(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); + return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_enable_auto_alt_ref(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.enable_auto_alt_ref = CAST(VP8E_SET_ENABLEAUTOALTREF, args); + return update_extra_cfg(ctx, &extra_cfg); +} -static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, - va_list args) { - vpx_codec_err_t res = VPX_CODEC_OK; +static vpx_codec_err_t ctrl_set_noise_sensitivity(vpx_codec_alg_priv_t *ctx, + va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.noise_sensitivity = CAST(VP8E_SET_NOISE_SENSITIVITY, args); + return update_extra_cfg(ctx, &extra_cfg); +} -#define MAP(id, var) case id: var = CAST(id, args); break; - - switch (ctrl_id) { - MAP(VP8E_SET_CPUUSED, extra_cfg.cpu_used); - MAP(VP8E_SET_ENABLEAUTOALTREF, extra_cfg.enable_auto_alt_ref); - MAP(VP8E_SET_NOISE_SENSITIVITY, extra_cfg.noise_sensitivity); - MAP(VP8E_SET_SHARPNESS, extra_cfg.sharpness); - MAP(VP8E_SET_STATIC_THRESHOLD, extra_cfg.static_thresh); - MAP(VP9E_SET_TILE_COLUMNS, extra_cfg.tile_columns); - MAP(VP9E_SET_TILE_ROWS, extra_cfg.tile_rows); - MAP(VP8E_SET_ARNR_MAXFRAMES, extra_cfg.arnr_max_frames); - MAP(VP8E_SET_ARNR_STRENGTH, extra_cfg.arnr_strength); - MAP(VP8E_SET_ARNR_TYPE, extra_cfg.arnr_type); - MAP(VP8E_SET_TUNING, extra_cfg.tuning); - MAP(VP8E_SET_CQ_LEVEL, extra_cfg.cq_level); - MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, extra_cfg.rc_max_intra_bitrate_pct); - MAP(VP9E_SET_LOSSLESS, extra_cfg.lossless); - MAP(VP9E_SET_FRAME_PARALLEL_DECODING, - extra_cfg.frame_parallel_decoding_mode); - MAP(VP9E_SET_AQ_MODE, extra_cfg.aq_mode); - MAP(VP9E_SET_FRAME_PERIODIC_BOOST, extra_cfg.frame_periodic_boost); - } +static vpx_codec_err_t ctrl_set_sharpness(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.sharpness = CAST(VP8E_SET_SHARPNESS, args); + return update_extra_cfg(ctx, &extra_cfg); +} - res = validate_config(ctx, &ctx->cfg, &extra_cfg); +static vpx_codec_err_t ctrl_set_static_thresh(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.static_thresh = CAST(VP8E_SET_STATIC_THRESHOLD, args); + return update_extra_cfg(ctx, &extra_cfg); +} - if (res == VPX_CODEC_OK) { - ctx->extra_cfg = extra_cfg; - set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); - vp9_change_config(ctx->cpi, &ctx->oxcf); - } +static vpx_codec_err_t ctrl_set_tile_columns(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.tile_columns = CAST(VP9E_SET_TILE_COLUMNS, args); + return update_extra_cfg(ctx, &extra_cfg); +} - return res; -#undef MAP +static vpx_codec_err_t ctrl_set_tile_rows(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.tile_rows = CAST(VP9E_SET_TILE_ROWS, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.arnr_max_frames = CAST(VP8E_SET_ARNR_MAXFRAMES, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_arnr_strength(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.arnr_strength = CAST(VP8E_SET_ARNR_STRENGTH, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_arnr_type(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.arnr_type = CAST(VP8E_SET_ARNR_TYPE, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_tuning(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.tuning = CAST(VP8E_SET_TUNING, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_cq_level(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.cq_level = CAST(VP8E_SET_CQ_LEVEL, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_rc_max_intra_bitrate_pct( + vpx_codec_alg_priv_t *ctx, va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.rc_max_intra_bitrate_pct = + CAST(VP8E_SET_MAX_INTRA_BITRATE_PCT, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.lossless = CAST(VP9E_SET_LOSSLESS, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_frame_parallel_decoding_mode( + vpx_codec_alg_priv_t *ctx, va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.frame_parallel_decoding_mode = + CAST(VP9E_SET_FRAME_PARALLEL_DECODING, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_aq_mode(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.aq_mode = CAST(VP9E_SET_AQ_MODE, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_frame_periodic_boost(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.frame_periodic_boost = CAST(VP9E_SET_FRAME_PERIODIC_BOOST, args); + return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, @@ -887,9 +983,8 @@ static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); - (void)ctrl_id; if (frame != NULL) { YV12_BUFFER_CONFIG sd; @@ -904,9 +999,8 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); - (void)ctrl_id; if (frame != NULL) { YV12_BUFFER_CONFIG sd; @@ -921,9 +1015,8 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *); - (void)ctrl_id; if (frame != NULL) { YV12_BUFFER_CONFIG *fb; @@ -937,11 +1030,9 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { + va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *); - (void)ctr_id; - if (config != NULL) { ctx->preview_ppcfg = *config; return VPX_CODEC_OK; @@ -950,7 +1041,6 @@ static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, } #else (void)ctx; - (void)ctr_id; (void)args; return VPX_CODEC_INCAPABLE; #endif @@ -977,36 +1067,32 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { } static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { const int update = va_arg(args, int); - (void)ctrl_id; vp9_update_entropy(ctx->cpi, update); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { const int ref_frame_flags = va_arg(args, int); - (void)ctrl_id; vp9_update_reference(ctx->cpi, ref_frame_flags); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { const int reference_flag = va_arg(args, int); - (void)ctrl_id; vp9_use_as_reference(ctx->cpi, reference_flag); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { (void)ctx; - (void)ctrl_id; (void)args; // TODO(yaowu): Need to re-implement and test for VP9. @@ -1015,9 +1101,8 @@ static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); - (void)ctrl_id; if (map) { if (!vp9_set_active_map(ctx->cpi, map->active_map, @@ -1031,9 +1116,8 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *); - (void)ctrl_id; if (mode) { const int res = vp9_set_internal_size(ctx->cpi, @@ -1045,11 +1129,9 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctrl_id, - va_list args) { +static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) { int data = va_arg(args, int); const vpx_codec_enc_cfg_t *cfg = &ctx->cfg; - (void)ctrl_id; vp9_set_svc(ctx->cpi, data); // CBR or two pass mode for SVC with both temporal and spatial layers @@ -1066,11 +1148,10 @@ static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctrl_id, } static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *); VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; - (void)ctrl_id; svc->spatial_layer_id = data->spatial_layer_id; svc->temporal_layer_id = data->temporal_layer_id; @@ -1087,10 +1168,9 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *); - (void)ctrl_id; if (params == NULL) return VPX_CODEC_INVALID_PARAM; @@ -1126,30 +1206,30 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8E_SET_ROI_MAP, ctrl_set_roi_map}, {VP8E_SET_ACTIVEMAP, ctrl_set_active_map}, {VP8E_SET_SCALEMODE, ctrl_set_scale_mode}, - {VP8E_SET_CPUUSED, ctrl_set_param}, - {VP8E_SET_NOISE_SENSITIVITY, ctrl_set_param}, - {VP8E_SET_ENABLEAUTOALTREF, ctrl_set_param}, - {VP8E_SET_SHARPNESS, ctrl_set_param}, - {VP8E_SET_STATIC_THRESHOLD, ctrl_set_param}, - {VP9E_SET_TILE_COLUMNS, ctrl_set_param}, - {VP9E_SET_TILE_ROWS, ctrl_set_param}, - {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_param}, - {VP8E_SET_ARNR_STRENGTH, ctrl_set_param}, - {VP8E_SET_ARNR_TYPE, ctrl_set_param}, - {VP8E_SET_TUNING, ctrl_set_param}, - {VP8E_SET_CQ_LEVEL, ctrl_set_param}, - {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_param}, - {VP9E_SET_LOSSLESS, ctrl_set_param}, - {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_param}, - {VP9E_SET_AQ_MODE, ctrl_set_param}, - {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_param}, + {VP8E_SET_CPUUSED, ctrl_set_cpuused}, + {VP8E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity}, + {VP8E_SET_ENABLEAUTOALTREF, ctrl_set_enable_auto_alt_ref}, + {VP8E_SET_SHARPNESS, ctrl_set_sharpness}, + {VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh}, + {VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns}, + {VP9E_SET_TILE_ROWS, ctrl_set_tile_rows}, + {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames}, + {VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength}, + {VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type}, + {VP8E_SET_TUNING, ctrl_set_tuning}, + {VP8E_SET_CQ_LEVEL, ctrl_set_cq_level}, + {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct}, + {VP9E_SET_LOSSLESS, ctrl_set_lossless}, + {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode}, + {VP9E_SET_AQ_MODE, ctrl_set_aq_mode}, + {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost}, {VP9E_SET_SVC, ctrl_set_svc}, {VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters}, {VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id}, // Getters - {VP8E_GET_LAST_QUANTIZER, ctrl_get_param}, - {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_param}, + {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer}, + {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64}, {VP9_GET_REFERENCE, ctrl_get_reference}, { -1, NULL}, @@ -1182,7 +1262,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { VPX_VBR, // rc_end_usage #if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION) - {0}, // rc_twopass_stats_in + {NULL, 0}, // rc_twopass_stats_in #endif 256, // rc_target_bandwidth 0, // rc_min_quantizer diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 9cf1735cb..48110b414 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -38,7 +38,6 @@ struct vpx_codec_alg_priv { vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; - int img_avail; int invert_tile_order; // External frame buffer info to save for VP9 common. @@ -48,10 +47,12 @@ struct vpx_codec_alg_priv { }; static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { + vpx_codec_priv_enc_mr_cfg_t *data) { // This function only allocates space for the vpx_codec_alg_priv_t // structure. More memory may be required at the time the stream // information becomes known. + (void)data; + if (!ctx->priv) { vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv)); if (alg_priv == NULL) @@ -243,14 +244,11 @@ static void init_decoder(vpx_codec_alg_priv_t *ctx) { static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline) { - YV12_BUFFER_CONFIG sd = { 0 }; vp9_ppflags_t flags = {0}; VP9_COMMON *cm = NULL; (void)deadline; - ctx->img_avail = 0; - // Determine the stream parameters. Note that we rely on peek_si to // validate that we have a buffer that does not wrap around the top // of the heap. @@ -285,13 +283,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) set_ppflags(ctx, &flags); - if (vp9_get_raw_frame(ctx->pbi, &sd, &flags)) - return update_error_state(ctx, &cm->error); - - yuvconfig2image(&ctx->img, &sd, user_priv); - ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - ctx->img_avail = 1; - return VPX_CODEC_OK; } @@ -420,15 +411,20 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; - if (ctx->img_avail) { - // iter acts as a flip flop, so an image is only returned on the first - // call to get_frame. - if (!(*iter)) { + // iter acts as a flip flop, so an image is only returned on the first + // call to get_frame. + if (*iter == NULL && ctx->pbi != NULL) { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + + if (vp9_get_raw_frame(ctx->pbi, &sd, &flags) == 0) { + VP9_COMMON *cm = &ctx->pbi->common; + yuvconfig2image(&ctx->img, &sd, NULL); + ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; img = &ctx->img; *iter = img; } } - ctx->img_avail = 0; return img; } @@ -452,7 +448,7 @@ static vpx_codec_err_t decoder_set_fb_fn( } static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { + va_list args) { vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); if (data) { @@ -468,7 +464,7 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { @@ -485,7 +481,7 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { + va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); if (data) { @@ -500,7 +496,7 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { + va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); @@ -512,17 +508,21 @@ static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } #else + (void)ctx; + (void)args; return VPX_CODEC_INCAPABLE; #endif } static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { + (void)ctx; + (void)args; return VPX_CODEC_INCAPABLE; } static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { int *const update_info = va_arg(args, int *); if (update_info) { @@ -538,7 +538,7 @@ static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { int *corrupted = va_arg(args, int *); if (corrupted) { @@ -553,7 +553,7 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { + va_list args) { int *const display_size = va_arg(args, int *); if (display_size) { @@ -571,13 +571,12 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { + va_list args) { ctx->invert_tile_order = va_arg(args, int); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); ctx->decrypt_cb = init ? init->decrypt_cb : NULL; @@ -628,11 +627,12 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = { decoder_set_fb_fn, // vpx_codec_set_fb_fn_t }, { // NOLINT - NOT_IMPLEMENTED, - NOT_IMPLEMENTED, - NOT_IMPLEMENTED, - NOT_IMPLEMENTED, - NOT_IMPLEMENTED, - NOT_IMPLEMENTED + NOT_IMPLEMENTED, // vpx_codec_enc_cfg_map_t + NOT_IMPLEMENTED, // vpx_codec_encode_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_cx_data_fn_t + NOT_IMPLEMENTED, // vpx_codec_enc_config_set_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_preview_frame_fn_t + NOT_IMPLEMENTED // vpx_codec_enc_mr_get_mem_loc_fn_t } }; diff --git a/vpx/exports_enc b/vpx/exports_enc index 155faf6f6..07f0280ec 100644 --- a/vpx/exports_enc +++ b/vpx/exports_enc @@ -8,7 +8,6 @@ text vpx_codec_get_preview_frame text vpx_codec_set_cx_data_buf text vpx_svc_dump_statistics text vpx_svc_encode -text vpx_svc_free text vpx_svc_get_buffer text vpx_svc_get_encode_frame_count text vpx_svc_get_frame_size @@ -22,4 +21,4 @@ text vpx_svc_set_quantizers text vpx_svc_set_scale_factors text vpx_svc_get_layer_resolution text vpx_svc_get_rc_stats_buffer_size -text vpx_svc_get_rc_stats_buffer
\ No newline at end of file +text vpx_svc_get_rc_stats_buffer diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h index 83c3308ee..82d2bc3c0 100644 --- a/vpx/internal/vpx_codec_internal.h +++ b/vpx/internal/vpx_codec_internal.h @@ -154,9 +154,8 @@ typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx, * \retval #VPX_CODEC_OK * The internal state data was deserialized. */ -typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list ap); +typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx, + va_list ap); /*!\brief control function pointer mapping * @@ -403,35 +402,12 @@ struct vpx_codec_priv_enc_mr_cfg #undef VPX_CTRL_USE_TYPE #define VPX_CTRL_USE_TYPE(id, typ) \ static typ id##__value(va_list args) {return va_arg(args, typ);} \ - static typ id##__convert(void *x)\ - {\ - union\ - {\ - void *x;\ - typ d;\ - } u;\ - u.x = x;\ - return u.d;\ - } - #undef VPX_CTRL_USE_TYPE_DEPRECATED #define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ static typ id##__value(va_list args) {return va_arg(args, typ);} \ - static typ id##__convert(void *x)\ - {\ - union\ - {\ - void *x;\ - typ d;\ - } u;\ - u.x = x;\ - return u.d;\ - } #define CAST(id, arg) id##__value(arg) -#define RECAST(id, x) id##__convert(x) - /* CODEC_INTERFACE convenience macro * diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c index 1f664ae49..6fb8f522d 100644 --- a/vpx/src/vpx_codec.c +++ b/vpx/src/vpx_codec.c @@ -125,7 +125,7 @@ vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, va_list ap; va_start(ap, ctrl_id); - res = entry->fn(ctx->priv->alg_priv, ctrl_id, ap); + res = entry->fn(ctx->priv->alg_priv, ap); va_end(ap); break; } |