diff options
46 files changed, 1065 insertions, 543 deletions
diff --git a/test/android/scrape_gtest_log.py b/test/android/scrape_gtest_log.py index c159c8938..487845c27 100644 --- a/test/android/scrape_gtest_log.py +++ b/test/android/scrape_gtest_log.py @@ -13,16 +13,45 @@ waterfall to gather json results mixed in with gtest logs. This is dubious software engineering. """ +import getopt import json +import os import re import sys def main(): + if len(sys.argv) != 3: + print "Expects a file to write json to!" + exit(1) + + try: + opts, _ = \ + getopt.getopt(sys.argv[1:], \ + 'o:', ['output-json=']) + except getopt.GetOptError: + print 'scrape_gtest_log.py -o <output_json>' + sys.exit(2) + + output_json = '' + for opt, arg in opts: + if opt in ('-o', '--output-json'): + output_json = os.path.join(arg) + blob = sys.stdin.read() json_string = '[' + ','.join('{' + x + '}' for x in re.findall(r'{([^}]*.?)}', blob)) + ']' - print json.dumps(json.loads(json_string), indent=4, sort_keys=True) + print blob + + output = json.dumps(json.loads(json_string), indent=4, sort_keys=True) + print output + + path = os.path.dirname(output_json) + if path and not os.path.exists(path): + os.makedirs(path) + + outfile = open(output_json, 'w') + outfile.write(output) if __name__ == '__main__': sys.exit(main()) diff --git a/test/test-data.sha1 b/test/test-data.sha1 index b8f668a78..cf2ad1eba 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -591,3 +591,47 @@ c21e97e4ba486520118d78b01a5cb6e6dc33e190 vp90-2-12-droppable_3.ivf 601abc9e4176c70f82ac0381365e9b151fdd24cd vp90-2-12-droppable_3.ivf.md5 61c640dad23cd4f7ad811b867e7b7e3521f4e3ba vp90-2-13-largescaling.webm bca1b02eebdb088fa3f389fe0e7571e75a71f523 vp90-2-13-largescaling.webm.md5 +c740708fa390806eebaf669909c1285ab464f886 vp90-2-14-resize-fp-tiles-1-2.webm +c7b85ffd8e11500f73f52e7dc5a47f57c393d47f vp90-2-14-resize-fp-tiles-1-2.webm.md5 +ec8faa352a08f7033c60f29f80d505e2d7daa103 vp90-2-14-resize-fp-tiles-1-4.webm +6852c783fb421bda5ded3d4c5a3ffc46de03fbc1 vp90-2-14-resize-fp-tiles-1-4.webm.md5 +8af61853ac0d07c4cb5bf7c2016661ba350b3497 vp90-2-14-resize-fp-tiles-1-8.webm +571353bac89fea60b5706073409aa3c0d42aefe9 vp90-2-14-resize-fp-tiles-1-8.webm.md5 +b1c187ed69931496b82ec194017a79831bafceef vp90-2-14-resize-fp-tiles-1-16.webm +1c199a41afe42ce303944d70089eaaa2263b4a09 vp90-2-14-resize-fp-tiles-1-16.webm.md5 +8eaae5a6f2dff934610b0c7a917d7f583ba74aa5 vp90-2-14-resize-fp-tiles-2-1.webm +db18fcf915f7ffaea6c39feab8bda6c1688af011 vp90-2-14-resize-fp-tiles-2-1.webm.md5 +bc3046d138941e2a20e9ceec0ff6d25c25d12af3 vp90-2-14-resize-fp-tiles-4-1.webm +393211b808030d09a79927b17a4374b2f68a60ae vp90-2-14-resize-fp-tiles-4-1.webm.md5 +6e8f8e31721a0f7f68a2964e36e0e698c2e276b1 vp90-2-14-resize-fp-tiles-8-1.webm +491fd3cd78fb0577bfe905bb64bbf64bd7d29140 vp90-2-14-resize-fp-tiles-8-1.webm.md5 +cc5958da2a7edf739cd2cfeb18bd05e77903087e vp90-2-14-resize-fp-tiles-16-1.webm +0b58daf55aaf9063bf5b4fb33393d18b417dc428 vp90-2-14-resize-fp-tiles-16-1.webm.md5 +821eeecc9d8c6a316134dd42d1ff057787d8047b vp90-2-14-resize-fp-tiles-2-4.webm +374c549f2839a3d0b732c4e3650700144037e76c vp90-2-14-resize-fp-tiles-2-4.webm.md5 +dff8c8e49aacea9f4c7f22cb882da984e2a1b405 vp90-2-14-resize-fp-tiles-2-8.webm +e5b8820a7c823b21297d6e889e57ec401882c210 vp90-2-14-resize-fp-tiles-2-8.webm.md5 +77629e4b23e32896aadf6e994c78bd4ffa1c7797 vp90-2-14-resize-fp-tiles-2-16.webm +1937f5df032664ac345d4613ad4417b4967b1230 vp90-2-14-resize-fp-tiles-2-16.webm.md5 +380ba5702bb1ec7947697314ab0300b5c56a1665 vp90-2-14-resize-fp-tiles-4-2.webm +fde7b30d2aa64c1e851a4852f655d79fc542cf66 vp90-2-14-resize-fp-tiles-4-2.webm.md5 +dc784b258ffa2abc2ae693d11792acf0bb9cb74f vp90-2-14-resize-fp-tiles-8-2.webm +edf26f0130aeee8342d49c2c8f0793ad008782d9 vp90-2-14-resize-fp-tiles-8-2.webm.md5 +8e575789fd63ebf69e8eff1b9a4351a249a73bee vp90-2-14-resize-fp-tiles-16-2.webm +b6415318c1c589a1f64b9d569ce3cabbec2e0d52 vp90-2-14-resize-fp-tiles-16-2.webm.md5 +e3adc944a11c4c5517e63664c84ebb0847b64d81 vp90-2-14-resize-fp-tiles-4-8.webm +03cba0532bc90a05b1990db830bf5701e24e7982 vp90-2-14-resize-fp-tiles-4-8.webm.md5 +3b27a991eb6d78dce38efab35b7db682e8cbbee3 vp90-2-14-resize-fp-tiles-4-16.webm +5d16b7f82bf59f802724ddfd97abb487150b1c9d vp90-2-14-resize-fp-tiles-4-16.webm.md5 +d5fed8c28c1d4c7e232ebbd25cf758757313ed96 vp90-2-14-resize-fp-tiles-8-4.webm +5a8ff8a52cbbde7bfab569beb6d971c5f8b904f7 vp90-2-14-resize-fp-tiles-8-4.webm.md5 +17a5faa023d77ee9dad423a4e0d3145796bbc500 vp90-2-14-resize-fp-tiles-16-4.webm +2ef8daa3c3e750fd745130d0a76a39fe86f0448f vp90-2-14-resize-fp-tiles-16-4.webm.md5 +9361e031f5cc990d8740863e310abb5167ae351e vp90-2-14-resize-fp-tiles-8-16.webm +57f13a2197486584f4e1a4f82ad969f3abc5a1a2 vp90-2-14-resize-fp-tiles-8-16.webm.md5 +5803fc6fcbfb47b7661f3fcc6499158a32b56675 vp90-2-14-resize-fp-tiles-16-8.webm +be0fe64a1a4933696ff92d93f9bdecdbd886dc13 vp90-2-14-resize-fp-tiles-16-8.webm.md5 +0ac0f6d20a0afed77f742a3b9acb59fd7b9cb093 vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm +1765315acccfe6cd12230e731369fcb15325ebfa vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 +4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8 vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +1ef480392112b3509cb190afbb96f9a38dd9fbac vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 diff --git a/test/test.mk b/test/test.mk index 4d96bc69d..92664e225 100644 --- a/test/test.mk +++ b/test/test.mk @@ -698,6 +698,50 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # BBB VP9 streams diff --git a/test/test_vectors.cc b/test/test_vectors.cc index 8c789ffe7..ff3c389e5 100644 --- a/test/test_vectors.cc +++ b/test/test_vectors.cc @@ -164,7 +164,20 @@ const char *const kVP9TestVectors[] = { "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm", "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf", "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf", - "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm" + "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm", + "vp90-2-14-resize-fp-tiles-1-16.webm", + "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", + "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm", + "vp90-2-14-resize-fp-tiles-16-1.webm", "vp90-2-14-resize-fp-tiles-16-2.webm", + "vp90-2-14-resize-fp-tiles-16-4.webm", + "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", + "vp90-2-14-resize-fp-tiles-16-8.webm", "vp90-2-14-resize-fp-tiles-1-8.webm", + "vp90-2-14-resize-fp-tiles-2-16.webm", "vp90-2-14-resize-fp-tiles-2-1.webm", + "vp90-2-14-resize-fp-tiles-2-4.webm", "vp90-2-14-resize-fp-tiles-2-8.webm", + "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm", + "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm", + "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", + "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm" }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc index a78cdea6b..5523f2024 100644 --- a/test/vp9_thread_test.cc +++ b/test/vp9_thread_test.cc @@ -153,6 +153,66 @@ TEST(VP9DecodeMTTest, MTDecode2) { } } +// Test tile quantity changes within one file. +TEST(VP9DecodeMTTest, MTDecode3) { + static const struct { + const char *name; + const char *expected_md5; + } files[] = { + { "vp90-2-14-resize-fp-tiles-1-16.webm", + "0cd5e632c326297e975f38949c31ea94" }, + { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", + "5c78a96a42e7f4a4f6b2edcdb791e44c" }, + { "vp90-2-14-resize-fp-tiles-1-2.webm", + "e030450ae85c3277be2a418769df98e2" }, + { "vp90-2-14-resize-fp-tiles-1-4.webm", + "312eed4e2b64eb7a4e7f18916606a430" }, + { "vp90-2-14-resize-fp-tiles-16-1.webm", + "1755c16d8af16a9cb3fe7338d90abe52" }, + { "vp90-2-14-resize-fp-tiles-16-2.webm", + "500300592d3fcb6f12fab25e48aaf4df" }, + { "vp90-2-14-resize-fp-tiles-16-4.webm", + "47c48379fa6331215d91c67648e1af6e" }, + { "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", + "eecf17290739bc708506fa4827665989" }, + { "vp90-2-14-resize-fp-tiles-16-8.webm", + "29b6bb54e4c26b5ca85d5de5fed94e76" }, + { "vp90-2-14-resize-fp-tiles-1-8.webm", + "1b6f175e08cd82cf84bb800ac6d1caa3" }, + { "vp90-2-14-resize-fp-tiles-2-16.webm", + "ca3b03e4197995d8d5444ede7a6c0804" }, + { "vp90-2-14-resize-fp-tiles-2-1.webm", + "99aec065369d70bbb78ccdff65afed3f" }, + { "vp90-2-14-resize-fp-tiles-2-4.webm", + "22d0ebdb49b87d2920a85aea32e1afd5" }, + { "vp90-2-14-resize-fp-tiles-2-8.webm", + "c2115cf051c62e0f7db1d4a783831541" }, + { "vp90-2-14-resize-fp-tiles-4-16.webm", + "c690d7e1719b31367564cac0af0939cb" }, + { "vp90-2-14-resize-fp-tiles-4-1.webm", + "a926020b2cc3e15ad4cc271853a0ff26" }, + { "vp90-2-14-resize-fp-tiles-4-2.webm", + "42699063d9e581f1993d0cf890c2be78" }, + { "vp90-2-14-resize-fp-tiles-4-8.webm", + "7f76d96036382f45121e3d5aa6f8ec52" }, + { "vp90-2-14-resize-fp-tiles-8-16.webm", + "76a43fcdd7e658542913ea43216ec55d" }, + { "vp90-2-14-resize-fp-tiles-8-1.webm", + "8e3fbe89486ca60a59299dea9da91378" }, + { "vp90-2-14-resize-fp-tiles-8-2.webm", + "ae96f21f21b6370cc0125621b441fc52" }, + { "vp90-2-14-resize-fp-tiles-8-4.webm", + "3eb4f24f10640d42218f7fd7b9fd30d4" }, + }; + + for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) { + for (int t = 2; t <= 8; ++t) { + EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str()) + << "threads = " << t; + } + } +} + INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool()); } // namespace diff --git a/third_party/libwebm/README.webm b/third_party/libwebm/README.webm index b13c8cbc6..2c7570d6d 100644 --- a/third_party/libwebm/README.webm +++ b/third_party/libwebm/README.webm @@ -1,5 +1,5 @@ URL: https://chromium.googlesource.com/webm/libwebm -Version: 630a0e3c338e1b32bddf513a2dad807908d2976a +Version: a7118d8ec564e9db841da1eb01f547f3229f240a License: BSD License File: LICENSE.txt diff --git a/third_party/libwebm/mkvmuxerutil.cpp b/third_party/libwebm/mkvmuxerutil.cpp index 96350e9c5..18060e902 100644 --- a/third_party/libwebm/mkvmuxerutil.cpp +++ b/third_party/libwebm/mkvmuxerutil.cpp @@ -292,11 +292,11 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) { if (WriteID(writer, type)) return false; - const int32 length = strlen(value); + const uint64 length = strlen(value); if (WriteUInt(writer, length)) return false; - if (writer->Write(value, length)) + if (writer->Write(value, static_cast<const uint32>(length))) return false; return true; diff --git a/third_party/libwebm/mkvreader.cpp b/third_party/libwebm/mkvreader.cpp index cb3567f1a..b4b24594c 100644 --- a/third_party/libwebm/mkvreader.cpp +++ b/third_party/libwebm/mkvreader.cpp @@ -14,13 +14,20 @@ namespace mkvparser { MkvReader::MkvReader() : - m_file(NULL) -{ + m_file(NULL), + reader_owns_file_(true) { } -MkvReader::~MkvReader() -{ +MkvReader::MkvReader(FILE* fp) : + m_file(fp), + reader_owns_file_(false) { + GetFileSize(); +} + +MkvReader::~MkvReader() { + if (reader_owns_file_) Close(); + m_file = NULL; } int MkvReader::Open(const char* fileName) @@ -42,12 +49,17 @@ int MkvReader::Open(const char* fileName) if (m_file == NULL) return -1; #endif + return !GetFileSize(); +} +bool MkvReader::GetFileSize() { + if (m_file == NULL) + return false; #ifdef _MSC_VER int status = _fseeki64(m_file, 0L, SEEK_END); if (status) - return -1; //error + return false; //error m_length = _ftelli64(m_file); #else @@ -56,16 +68,19 @@ int MkvReader::Open(const char* fileName) #endif assert(m_length >= 0); + if (m_length < 0) + return false; + #ifdef _MSC_VER status = _fseeki64(m_file, 0L, SEEK_SET); if (status) - return -1; //error + return false; //error #else fseek(m_file, 0L, SEEK_SET); #endif - return 0; + return true; } void MkvReader::Close() diff --git a/third_party/libwebm/mkvreader.hpp b/third_party/libwebm/mkvreader.hpp index adcc29f47..8ebdd99a7 100644 --- a/third_party/libwebm/mkvreader.hpp +++ b/third_party/libwebm/mkvreader.hpp @@ -21,6 +21,7 @@ class MkvReader : public IMkvReader MkvReader& operator=(const MkvReader&); public: MkvReader(); + MkvReader(FILE* fp); virtual ~MkvReader(); int Open(const char*); @@ -29,8 +30,15 @@ public: virtual int Read(long long position, long length, unsigned char* buffer); virtual int Length(long long* total, long long* available); private: + + // Determines the size of the file. This is called either by the constructor + // or by the Open function depending on file ownership. Returns true on + // success. + bool GetFileSize(); + long long m_length; FILE* m_file; + bool reader_owns_file_; }; } //end namespace mkvparser diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index 779dce017..068284faa 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -25,6 +25,23 @@ extern "C" { #define MI_MASK (MI_BLOCK_SIZE - 1) +// Bitstream profiles indicated by 2 bits in the uncompressed header. +// 00: Profile 0. 4:2:0 only. +// 10: Profile 1. adds 4:4:4, 4:2:2, alpha. +// 01: Profile 2. Supports 10-bit and 12-bit color only. +// 11: Undefined profile. +typedef enum BITSTREAM_PROFILE { + PROFILE_0, + PROFILE_1, + PROFILE_2, + MAX_PROFILES +} BITSTREAM_PROFILE; + +typedef enum BIT_DEPTH { + BITS_8, + BITS_10, + BITS_12 +} BIT_DEPTH; typedef enum BLOCK_SIZE { BLOCK_4X4, diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 61682c42d..1aab36205 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -195,7 +195,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, int block, int mi_row, int mi_col) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi + const MODE_INFO *prev_mi = cm->prev_mi ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] : NULL; const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index ea1b8856e..fe9cc9e6a 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -179,7 +179,10 @@ typedef struct VP9Common { FRAME_COUNTS counts; unsigned int current_video_frame; - int version; + BITSTREAM_PROFILE profile; + + // BITS_8 in versions 0 and 1, BITS_10 or BITS_12 in version 2 + BIT_DEPTH bit_depth; #if CONFIG_VP9_POSTPROC struct postproc_state postproc_state; @@ -281,15 +284,15 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->left_available = (mi_col > tile->mi_col_start); } -static INLINE void set_prev_mi(VP9_COMMON *cm) { - const int use_prev_in_find_mv_refs = cm->width == cm->last_width && - cm->height == cm->last_height && - !cm->intra_only && - cm->last_show_frame; +static INLINE MODE_INFO *get_prev_mi(VP9_COMMON *cm) { + const int use_prev_mi = cm->coding_use_prev_mi && + cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->intra_only && + cm->last_show_frame; // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. - cm->prev_mi = use_prev_in_find_mv_refs ? - cm->prev_mip + cm->mi_stride + 1 : NULL; + return use_prev_mi ? &cm->prev_mip[cm->mi_stride + 1] : NULL; } static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index b874ef3ba..b45559245 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -380,6 +380,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; +add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_16x16 sse2/; +$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2; + add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c index 7e9cc840a..b84db970e 100644 --- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c @@ -33,10 +33,11 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { }; #if defined(__clang__) -# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ + (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0) # define MM256_BROADCASTSI128_SI256(x) \ _mm_broadcastsi128_si256((__m128i const *)&(x)) -# else // clang > 3.3 +# else // clang > 3.3, and not 5.0 on macosx. # define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) # endif // clang <= 3.3 #elif defined(__GNUC__) diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 5a2e6f881..9b63961f0 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -671,7 +671,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, setup_display_size(cm, rb); } -static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, +static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; @@ -776,7 +776,7 @@ typedef struct TileBuffer { int col; // only used with multi-threaded decoding } TileBuffer; -static const uint8_t *decode_tiles(VP9D_COMP *pbi, +static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; @@ -865,7 +865,7 @@ static int compare_tile_buffers(const void *a, const void *b) { } } -static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; @@ -882,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, assert(tile_rows == 1); (void)tile_rows; - if (num_workers > pbi->num_tile_workers) { + // TODO(jzern): See if we can remove the restriction of passing in max + // threads to the decoder. + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->oxcf.max_threads & ~1; int i; + // TODO(jzern): Allocate one less worker, as in the current code we only + // use num_threads - 1 workers. CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_realloc(pbi->tile_workers, - num_workers * sizeof(*pbi->tile_workers))); - for (i = pbi->num_tile_workers; i < num_workers; ++i) { + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (i = 0; i < num_threads; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; ++pbi->num_tile_workers; @@ -895,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(TileWorkerData))); CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); - if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + if (i < num_threads - 1 && !vp9_worker_reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } @@ -903,7 +907,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, } // Reset tile decoding hook - for (n = 0; n < pbi->num_tile_workers; ++n) { + for (n = 0; n < num_workers; ++n) { pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook; } @@ -1005,12 +1009,13 @@ static void error_handler(void *data) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -#define RESERVED \ - if (vp9_rb_read_bit(rb)) \ - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ - "Reserved bit must be unset") +static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { + int profile = vp9_rb_read_bit(rb); + profile |= vp9_rb_read_bit(rb) << 1; + return (BITSTREAM_PROFILE) profile; +} -static size_t read_uncompressed_header(VP9D_COMP *pbi, +static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; size_t sz; @@ -1022,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); - cm->version = vp9_rb_read_bit(rb); - RESERVED; + cm->profile = read_profile(rb); + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); cm->show_existing_frame = vp9_rb_read_bit(rb); if (cm->show_existing_frame) { @@ -1048,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, if (cm->frame_type == KEY_FRAME) { check_sync_code(cm, rb); - + if (cm->profile > PROFILE_1) + cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10; cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_x = vp9_rb_read_bit(rb); cm->subsampling_y = vp9_rb_read_bit(rb); vp9_rb_read_bit(rb); // has extra plane @@ -1060,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->subsampling_y = cm->subsampling_x = 1; } } else { - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_y = cm->subsampling_x = 0; vp9_rb_read_bit(rb); // has extra plane } else { @@ -1147,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, return sz; } -static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; @@ -1247,7 +1255,7 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) { } #endif // NDEBUG -int vp9_decode_frame(VP9D_COMP *pbi, +int vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; @@ -1288,11 +1296,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, } init_macroblockd(cm, &pbi->mb); - - if (cm->coding_use_prev_mi) - set_prev_mi(cm); - else - cm->prev_mi = NULL; + cm->prev_mi = get_prev_mi(cm); setup_plane_dequants(cm, xd, cm->base_qindex); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h index e474db495..8a19dafc5 100644 --- a/vp9/decoder/vp9_decodeframe.h +++ b/vp9/decoder/vp9_decodeframe.h @@ -17,11 +17,11 @@ extern "C" { #endif struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; void vp9_init_dequantizer(struct VP9Common *cm); -int vp9_decode_frame(struct VP9Decompressor *pbi, +int vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index fb3666cbe..fd74478e9 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -110,8 +110,8 @@ void vp9_initialize_dec() { } } -VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) { - VP9D_COMP *const pbi = vpx_memalign(32, sizeof(*pbi)); +VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) { + VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) @@ -152,7 +152,7 @@ VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) { return pbi; } -void vp9_decoder_remove(VP9D_COMP *pbi) { +void vp9_decoder_remove(VP9Decoder *pbi) { VP9_COMMON *const cm = &pbi->common; int i; @@ -182,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a, a->uv_height == b->uv_height && a->uv_width == b->uv_width; } -vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &pbi->common; @@ -252,7 +252,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, } -int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { +int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &pbi->common; if (index < 0 || index >= REF_FRAMES) @@ -263,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { } /* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9D_COMP *pbi) { +static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; @@ -287,7 +287,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { cm->frame_refs[ref_index].idx = INT_MAX; } -int vp9_receive_compressed_data(VP9D_COMP *pbi, +int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource, int64_t time_stamp) { VP9_COMMON *const cm = &pbi->common; @@ -403,7 +403,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, return retcode; } -int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags) { int ret = -1; diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 2e8bebdae..c9dc25191 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -35,7 +35,7 @@ typedef struct { int inv_tile_order; } VP9D_CONFIG; -typedef struct VP9Decompressor { +typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, VP9_COMMON, common); @@ -59,20 +59,20 @@ typedef struct VP9Decompressor { int num_tile_workers; VP9LfSync lf_row_sync; -} VP9D_COMP; +} VP9Decoder; void vp9_initialize_dec(); -int vp9_receive_compressed_data(struct VP9Decompressor *pbi, +int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, const uint8_t **dest, int64_t time_stamp); -int vp9_get_raw_frame(struct VP9Decompressor *pbi, +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags); -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); @@ -80,13 +80,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp9_get_reference_dec(struct VP9Decompressor *pbi, +int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); -struct VP9Decompressor *vp9_decoder_create(const VP9D_CONFIG *oxcf); +struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf); -void vp9_decoder_remove(struct VP9Decompressor *pbi); +void vp9_decoder_remove(struct VP9Decoder *pbi); #ifdef __cplusplus } // extern "C" diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index 163936021..9b124c9d9 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -132,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. -void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, +void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. @@ -168,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); // Set up loopfilter thread data. - for (i = 0; i < pbi->num_tile_workers; ++i) { + // The decoder is using num_workers instead of pbi->num_tile_workers + // because it has been observed that using more threads on the + // loopfilter, than there are tile columns in the frame will hurt + // performance on Android. This is because the system will only + // schedule the tile decode workers on cores equal to the number + // of tile columns. Then if the decoder tries to use more threads for the + // loopfilter, it will hurt performance because of contention. If the + // multithreading code changes in the future then the number of workers + // used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; LFWorkerData *const lf_data = &tile_data->lfdata; @@ -184,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, lf_data->y_only = y_only; // always do all planes in decoder lf_data->lf_sync = &pbi->lf_row_sync; - lf_data->num_lf_workers = pbi->num_tile_workers; + lf_data->num_lf_workers = num_workers; // Start loopfiltering - if (i == pbi->num_tile_workers - 1) { + if (i == num_workers - 1) { vp9_worker_execute(worker); } else { vp9_worker_launch(worker); @@ -195,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, } // Wait till all rows are finished - for (i = 0; i < pbi->num_tile_workers; ++i) { + for (i = 0; i < num_workers; ++i) { vp9_worker_sync(&pbi->tile_workers[i]); } } diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index 2f65e1e30..005bd7bbd 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -18,7 +18,7 @@ struct macroblockd; struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; typedef struct TileWorkerData { struct VP9Common *cm; @@ -50,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync, void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi, +void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, struct VP9Common *cm, struct macroblockd *xd, int frame_filter_level, diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c index 83892e872..47ad8d8cc 100644 --- a/vp9/encoder/vp9_aq_complexity.c +++ b/vp9/encoder/vp9_aq_complexity.c @@ -48,8 +48,7 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { // Use some of the segments for in frame Q adjustment. for (segment = 1; segment < 2; segment++) { const int qindex_delta = - vp9_compute_qdelta_by_rate(cpi, - cm->base_qindex, + vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, in_frame_q_adj_ratio[segment]); vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 2e1b4ef5f..787909142 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -241,7 +241,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); // Set the q delta for segment 1. - qindex_delta = vp9_compute_qdelta_by_rate(cpi, + qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type, cm->base_qindex, rate_ratio_qdelta); // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c index c25eb95c7..ae2a163b1 100644 --- a/vp9/encoder/vp9_aq_variance.c +++ b/vp9/encoder/vp9_aq_variance.c @@ -99,7 +99,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) { continue; } - qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i)); + qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i)); vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta); vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 4313418d4..8d2afb991 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1031,19 +1031,22 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) { vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8); } +static void write_profile(BITSTREAM_PROFILE profile, + struct vp9_write_bit_buffer *wb) { + assert(profile < MAX_PROFILES); + vp9_wb_write_bit(wb, profile & 1); + vp9_wb_write_bit(wb, profile >> 1); +} + static void write_uncompressed_header(VP9_COMP *cpi, struct vp9_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2); - // bitstream version. - // 00 - profile 0. 4:2:0 only - // 10 - profile 1. adds 4:4:4, 4:2:2, alpha - vp9_wb_write_bit(wb, cm->version); - vp9_wb_write_bit(wb, 0); + write_profile(cm->profile, wb); - vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, 0); // show_existing_frame vp9_wb_write_bit(wb, cm->frame_type); vp9_wb_write_bit(wb, cm->show_frame); vp9_wb_write_bit(wb, cm->error_resilient_mode); @@ -1051,16 +1054,20 @@ static void write_uncompressed_header(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { const COLOR_SPACE cs = UNKNOWN; write_sync_code(wb); + if (cm->profile > PROFILE_1) { + assert(cm->bit_depth > BITS_8); + vp9_wb_write_bit(wb, cm->bit_depth - BITS_10); + } vp9_wb_write_literal(wb, cs, 3); if (cs != SRGB) { vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { vp9_wb_write_bit(wb, cm->subsampling_x); vp9_wb_write_bit(wb, cm->subsampling_y); vp9_wb_write_bit(wb, 0); // has extra plane } } else { - assert(cm->version == 1); + assert(cm->profile == PROFILE_1); vp9_wb_write_bit(wb, 0); // has extra plane } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 1ca3c2881..61a5022ec 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1048,9 +1048,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2)); if (!is_edge && (complexity > 128)) x->rdmult += ((x->rdmult * (complexity - 128)) / 256); - } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + } else if (aq_mode == CYCLIC_REFRESH_AQ) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; + : cm->last_frame_seg_map; // If segment 1, use rdmult for that segment. if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); @@ -1076,8 +1076,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, vp9_clear_system_state(); *totalrate = (int)round(*totalrate * rdmult_ratio); } - } else if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) || - (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)) { + } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) { x->rdmult = orig_rdmult; } } @@ -1365,6 +1364,69 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } +static void constrain_copy_partitioning(VP9_COMP *const cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + MODE_INFO **prev_mi_8x8, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mi_stride; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; + MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + int block_row, block_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // If the SB64 if it is all "in image". + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { + for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { + const int index = block_row * mis + block_col; + MODE_INFO *prev_mi = prev_mi_8x8[index]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + // Use previous partition if block size is not larger than bsize. + if (prev_mi && sb_type <= bsize) { + int block_row2, block_col2; + for (block_row2 = 0; block_row2 < bh; ++block_row2) { + for (block_col2 = 0; block_col2 < bw; ++block_col2) { + const int index2 = (block_row + block_row2) * mis + + block_col + block_col2; + prev_mi = prev_mi_8x8[index2]; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[index2] = cm->mi + offset; + mi_8x8[index2]->mbmi.sb_type = prev_mi->mbmi.sb_type; + } + } + } + } else { + // Otherwise, use fixed partition of size bsize. + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } + } else { + // Else this is a partial SB64, copy previous partition. + for (block_row = 0; block_row < 8; ++block_row) { + for (block_col = 0; block_col < 8; ++block_col) { + MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[block_row * mis + block_col] = cm->mi + offset; + mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; + } + } + } + } +} + static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, MODE_INFO **prev_mi_8x8) { const int mis = cm->mi_stride; @@ -1384,6 +1446,125 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, } } +const struct { + int row; + int col; +} coord_lookup[16] = { + // 32x32 index = 0 + {0, 0}, {0, 2}, {2, 0}, {2, 2}, + // 32x32 index = 1 + {0, 4}, {0, 6}, {2, 4}, {2, 6}, + // 32x32 index = 2 + {4, 0}, {4, 2}, {6, 0}, {6, 2}, + // 32x32 index = 3 + {4, 4}, {4, 6}, {6, 4}, {6, 6}, +}; + +static void set_source_var_based_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + const int mis = cm->mi_stride; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int r, c; + MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // In-image SB64 + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + const int src_stride = x->plane[0].src.stride; + const int pre_stride = cpi->Last_Source->y_stride; + const uint8_t *src = x->plane[0].src.buf; + const int pre_offset = (mi_row * MI_SIZE) * pre_stride + + (mi_col * MI_SIZE); + const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset; + const int thr_32x32 = cpi->sf.source_var_thresh; + const int thr_64x64 = thr_32x32 << 1; + int i, j; + int index; + diff d32[4]; + int use16x16 = 0; + + for (i = 0; i < 4; i++) { + diff d16[4]; + + for (j = 0; j < 4; j++) { + int b_mi_row = coord_lookup[i * 4 + j].row; + int b_mi_col = coord_lookup[i * 4 + j].col; + int b_offset = b_mi_row * MI_SIZE * src_stride + + b_mi_col * MI_SIZE; + + vp9_get_sse_sum_16x16(src + b_offset, + src_stride, + pre_src + b_offset, + pre_stride, &d16[j].sse, &d16[j].sum); + + d16[j].var = d16[j].sse - + (((uint32_t)d16[j].sum * d16[j].sum) >> 8); + + index = b_mi_row * mis + b_mi_col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_16X16; + + // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition + // size to further improve quality. + } + + if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 && + d16[2].var < thr_32x32 && d16[3].var < thr_32x32) { + d32[i].sse = d16[0].sse; + d32[i].sum = d16[0].sum; + + for (j = 1; j < 4; j++) { + d32[i].sse += d16[j].sse; + d32[i].sum += d16[j].sum; + } + + d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10); + + index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_32X32; + + if (!((cm->current_video_frame - 1) % + cpi->sf.search_type_check_frequency)) + cpi->use_large_partition_rate += 1; + } else { + use16x16 = 1; + } + } + + if (!use16x16) { + if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 && + d32[2].var < thr_64x64 && d32[3].var < thr_64x64) { + mi_8x8[0] = mi_upper_left; + mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + } + } + } else { // partial in-image SB64 + BLOCK_SIZE bsize = BLOCK_16X16; + int bh = num_8x8_blocks_high_lookup[bsize]; + int bw = num_8x8_blocks_wide_lookup[bsize]; + + for (r = 0; r < MI_BLOCK_SIZE; r += bh) { + for (c = 0; c < MI_BLOCK_SIZE; c += bw) { + int index = r * mis + c; + // Find a partition size that fits + bsize = find_partition_size(bsize, + (row8x8_remaining - r), + (col8x8_remaining - c), &bh, &bw); + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } +} + static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { const int mis = cm->mi_stride; int block_row, block_col; @@ -2297,6 +2478,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; + SPEED_FEATURES *const sf = &cpi->sf; int mi_col; // Initialize the left context for the new SB row @@ -2312,7 +2494,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE i; MACROBLOCK *x = &cpi->mb; - if (cpi->sf.adaptive_pred_interp_filter) { + if (sf->adaptive_pred_interp_filter) { for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) { const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; const int num_4x4_h = num_4x4_blocks_high_lookup[i]; @@ -2326,64 +2508,69 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if ((cpi->sf.partition_search_type == SEARCH_PARTITION && - cpi->sf.use_lastframe_partitioning) || - cpi->sf.partition_search_type == FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { + if ((sf->partition_search_type == SEARCH_PARTITION && + sf->use_lastframe_partitioning) || + sf->partition_search_type == FIXED_PARTITION || + sf->partition_search_type == VAR_BASED_PARTITION || + sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; cpi->mb.source_variance = UINT_MAX; - if (cpi->sf.partition_search_type == FIXED_PARTITION) { + if (sf->partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - cpi->sf.always_this_block_size); + sf->always_this_block_size); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } else { if ((cm->current_video_frame - % cpi->sf.last_partitioning_redo_frequency) == 0 + % sf->last_partitioning_redo_frequency) == 0 || cm->prev_mi == 0 || cm->show_frame == 0 || cm->frame_type == KEY_FRAME || cpi->rc.is_src_frame_alt_ref - || ((cpi->sf.use_lastframe_partitioning == + || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && sb_has_motion(cm, prev_mi_8x8))) { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { - copy_partitioning(cm, mi_8x8, prev_mi_8x8); + if (sf->constrain_copy_partition && + sb_has_motion(cm, prev_mi_8x8)) + constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8, + mi_row, mi_col, BLOCK_16X16); + else + copy_partitioning(cm, mi_8x8, prev_mi_8x8); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } } } else { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); @@ -2448,19 +2635,6 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - if (!mi_8x8[y * mis + x]->mbmi.skip) - return 0; - } - } - - return 1; -} - static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) { int mi_row, mi_col; const int mis = cm->mi_stride; @@ -2911,12 +3085,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (bsize >= BLOCK_8X8) { - subsize = mi_8x8[0]->mbmi.sb_type; - } else { - subsize = BLOCK_4X4; - } - + subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; switch (partition) { @@ -3019,10 +3188,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - - BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? - cpi->sf.always_this_block_size : - get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + BLOCK_SIZE bsize; cpi->mb.source_variance = UINT_MAX; vp9_zero(cpi->mb.pred_mv); @@ -3034,8 +3200,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist); break; + case SOURCE_VAR_BASED_PARTITION: + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist); + break; case VAR_BASED_FIXED_PARTITION: case FIXED_PARTITION: + bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? + cpi->sf.always_this_block_size : + get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist); @@ -3058,53 +3233,42 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, // end RTC play code static void encode_frame_internal(VP9_COMP *cpi) { - int mi_row; + SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); - xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; vp9_zero(cm->counts); vp9_zero(cpi->coef_counts); vp9_zero(cpi->tx_stepdown_count); + vp9_zero(cpi->rd_comp_pred_diff); + vp9_zero(cpi->rd_filter_diff); + vp9_zero(cpi->rd_tx_select_diff); + vp9_zero(cpi->rd_tx_select_threshes); - // Set frame level transform size use case cm->tx_mode = select_tx_mode(cpi); - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 - && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); vp9_frame_init_quantizer(cpi); vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // Re-initialize encode frame context. init_encode_frame_mb_context(cpi); - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + build_activity_map(cpi); - set_prev_mi(cm); + cm->prev_mi = get_prev_mi(cm); - if (cpi->sf.use_nonrd_pick_mode) { + if (sf->use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD // mode decision is used and hence no buffer pointer swap needed. int i; @@ -3119,6 +3283,29 @@ static void encode_frame_internal(VP9_COMP *cpi) { p[i].eobs = ctx->eobs_pbuf[i][0]; } vp9_zero(x->zcoeff_blk); + + if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION && + cm->current_video_frame > 0) { + int check_freq = cpi->sf.search_type_check_frequency; + + if ((cm->current_video_frame - 1) % check_freq == 0) { + cpi->use_large_partition_rate = 0; + } + + if ((cm->current_video_frame - 1) % check_freq == 1) { + const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] - + b_width_log2_lookup[BLOCK_16X16]) + + (b_height_log2_lookup[BLOCK_32X32] - + b_height_log2_lookup[BLOCK_16X16])); + cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 * + mbs_in_b32x32 / cm->MBs; + } + + if ((cm->current_video_frame - 1) % check_freq >= 1) { + if (cpi->use_large_partition_rate < 15) + cpi->sf.partition_search_type = FIXED_PARTITION; + } + } } { @@ -3136,12 +3323,13 @@ static void encode_frame_internal(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileInfo tile; TOKENEXTRA *tp_old = tp; + int mi_row; // For each row of SBs in the frame vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) + if (sf->use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) encode_nonrd_sb_row(cpi, &tile, mi_row, &tp); else encode_rd_sb_row(cpi, &tile, mi_row, &tp); @@ -3156,18 +3344,18 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } - if (cpi->sf.skip_encode_sb) { + if (sf->skip_encode_sb) { int j; unsigned int intra_count = 0, inter_count = 0; for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { intra_count += cm->counts.intra_inter[j][0]; inter_count += cm->counts.intra_inter[j][1]; } - cpi->sf.skip_encode_frame = (intra_count << 2) < inter_count && - cm->frame_type != KEY_FRAME && - cm->show_frame; + sf->skip_encode_frame = (intra_count << 2) < inter_count && + cm->frame_type != KEY_FRAME && + cm->show_frame; } else { - cpi->sf.skip_encode_frame = 0; + sf->skip_encode_frame = 0; } #if 0 @@ -3201,33 +3389,31 @@ void vp9_encode_frame(VP9_COMP *cpi) { if (cpi->sf.frame_parameter_update) { int i; - REFERENCE_MODE reference_mode; - /* - * This code does a single RD pass over the whole frame assuming - * either compound, single or hybrid prediction as per whatever has - * worked best for that type of frame in the past. - * It also predicts whether another coding mode would have worked - * better that this coding mode. If that is the case, it remembers - * that for subsequent frames. - * It does the same analysis for transform size selection also. - */ + + // This code does a single RD pass over the whole frame assuming + // either compound, single or hybrid prediction as per whatever has + // worked best for that type of frame in the past. + // It also predicts whether another coding mode would have worked + // better that this coding mode. If that is the case, it remembers + // that for subsequent frames. + // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type]; const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type]; /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == 3 || !cm->allow_comp_inter_inter) - reference_mode = SINGLE_REFERENCE; + if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter) + cm->reference_mode = SINGLE_REFERENCE; else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] && mode_thresh[COMPOUND_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) - reference_mode = COMPOUND_REFERENCE; + cm->reference_mode = COMPOUND_REFERENCE; else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT]) - reference_mode = SINGLE_REFERENCE; + cm->reference_mode = SINGLE_REFERENCE; else - reference_mode = REFERENCE_MODE_SELECT; + cm->reference_mode = REFERENCE_MODE_SELECT; if (cm->interp_filter == SWITCHABLE) { if (frame_type != ALTREF_FRAME && @@ -3243,9 +3429,6 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; - cm->reference_mode = reference_mode; - encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { @@ -3324,10 +3507,8 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } } else { - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; cm->reference_mode = SINGLE_REFERENCE; - // Force the usage of the BILINEAR interp_filter. - cm->interp_filter = BILINEAR; + cm->interp_filter = SWITCHABLE; encode_frame_internal(cpi); } } diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 72343cdf2..131e93201 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -20,6 +20,12 @@ struct macroblock; struct yv12_buffer_config; struct VP9_COMP; +typedef struct { + unsigned int sse; + int sum; + unsigned int var; +} diff; + void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index dc329fd1e..db32ef8c9 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -258,13 +258,14 @@ static void avg_stats(FIRSTPASS_STATS *section) { static double calculate_modified_err(const VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame) { const struct twopass_rc *twopass = &cpi->twopass; + const SVC *const svc = &cpi->svc; const FIRSTPASS_STATS *stats; double av_err; double modified_error; - if (cpi->svc.number_spatial_layers > 1 && - cpi->svc.number_temporal_layers == 1) { - twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + if (svc->number_spatial_layers > 1 && + svc->number_temporal_layers == 1) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; } stats = &twopass->total_stats; @@ -335,15 +336,13 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) { } // This function returns the maximum target rate per frame. -static int frame_max_bits(const VP9_COMP *cpi) { - int64_t max_bits = - ((int64_t)cpi->rc.av_per_frame_bandwidth * - (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100; - +static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) { + int64_t max_bits = ((int64_t)rc->av_per_frame_bandwidth * + (int64_t)oxcf->two_pass_vbrmax_section) / 100; if (max_bits < 0) max_bits = 0; - else if (max_bits > cpi->rc.max_frame_bandwidth) - max_bits = cpi->rc.max_frame_bandwidth; + else if (max_bits > rc->max_frame_bandwidth) + max_bits = rc->max_frame_bandwidth; return (int)max_bits; } @@ -916,9 +915,8 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, if (section_target_bandwitdh <= 0) return rc->worst_quality; // Highest value allowed - target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); + target_norm_bits_per_mb = + ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs; // Try and pick a max Q that will be high enough to encode the // content at the given rate. @@ -1059,8 +1057,8 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, - int still_interval, +static int detect_transition_to_still(struct twopass_rc *twopass, + int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { int trans_to_still = 0; @@ -1072,19 +1070,19 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; - const FIRSTPASS_STATS *position = cpi->twopass.stats_in; + const FIRSTPASS_STATS *position = twopass->stats_in; FIRSTPASS_STATS tmp_next_frame; // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { - if (EOF == input_stats(&cpi->twopass, &tmp_next_frame)) + if (EOF == input_stats(twopass, &tmp_next_frame)) break; if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) break; } - reset_fpf_position(&cpi->twopass, position); + reset_fpf_position(twopass, position); // Only if it does do we signal a transition to still. if (j == still_interval) @@ -1406,9 +1404,11 @@ void define_fixed_arf_period(VP9_COMP *cpi) { // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + RATE_CONTROL *const rc = &cpi->rc; + VP9_CONFIG *const oxcf = &cpi->oxcf; + struct twopass_rc *const twopass = &cpi->twopass; FIRSTPASS_STATS next_frame = { 0 }; const FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1427,16 +1427,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mv_ratio_accumulator_thresh; - const int max_bits = frame_max_bits(cpi); // Max bits for a single frame. - - unsigned int allow_alt_ref = cpi->oxcf.play_alternate && - cpi->oxcf.lag_in_frames; + // Max bits for a single frame. + const int max_bits = frame_max_bits(rc, oxcf); + unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames; int f_boost = 0; int b_boost = 0; int flash_detected; int active_max_gf_interval; - RATE_CONTROL *const rc = &cpi->rc; twopass->gf_group_bits = 0; @@ -1508,7 +1506,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, + if (detect_transition_to_still(twopass, i, 5, loop_decay_rate, last_loop_decay_rate)) { allow_alt_ref = 0; break; @@ -1792,36 +1790,36 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Allocate bits to a normal frame that is neither a gf an arf or a key frame. static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + struct twopass_rc *twopass = &cpi->twopass; + // For a single frame. + const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + // Calculate modified prediction error used in bit allocation. + const double modified_err = calculate_modified_err(cpi, this_frame); int target_frame_size; - double modified_err; double err_fraction; - const int max_bits = frame_max_bits(cpi); // Max for a single frame. - - // Calculate modified prediction error used in bit allocation. - modified_err = calculate_modified_err(cpi, this_frame); - if (cpi->twopass.gf_group_error_left > 0) + if (twopass->gf_group_error_left > 0) // What portion of the remaining GF group error is used by this frame. - err_fraction = modified_err / cpi->twopass.gf_group_error_left; + err_fraction = modified_err / twopass->gf_group_error_left; else err_fraction = 0.0; // How many of those bits available for allocation should we give it? - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); + target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction); // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at // the top end. target_frame_size = clamp(target_frame_size, 0, - MIN(max_bits, (int)cpi->twopass.gf_group_bits)); + MIN(max_bits, (int)twopass->gf_group_bits)); // Adjust error and bits remaining. - cpi->twopass.gf_group_error_left -= (int64_t)modified_err; + twopass->gf_group_error_left -= (int64_t)modified_err; // Per frame bit target for this frame. vp9_rc_set_frame_target(cpi, target_frame_size); } -static int test_candidate_kf(VP9_COMP *cpi, +static int test_candidate_kf(struct twopass_rc *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *next_frame) { @@ -1842,7 +1840,7 @@ static int test_candidate_kf(VP9_COMP *cpi, ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) { int i; - const FIRSTPASS_STATS *start_pos = cpi->twopass.stats_in; + const FIRSTPASS_STATS *start_pos = twopass->stats_in; FIRSTPASS_STATS local_next_frame = *next_frame; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1879,7 +1877,7 @@ static int test_candidate_kf(VP9_COMP *cpi, old_boost_score = boost_score; // Get the next frame details - if (EOF == input_stats(&cpi->twopass, &local_next_frame)) + if (EOF == input_stats(twopass, &local_next_frame)) break; } @@ -1889,7 +1887,7 @@ static int test_candidate_kf(VP9_COMP *cpi, is_viable_kf = 1; } else { // Reset the file position - reset_fpf_position(&cpi->twopass, start_pos); + reset_fpf_position(twopass, start_pos); is_viable_kf = 0; } @@ -1902,16 +1900,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - FIRSTPASS_STATS last_frame; const FIRSTPASS_STATS first_frame = *this_frame; - FIRSTPASS_STATS next_frame; const FIRSTPASS_STATS *start_position = twopass->stats_in; - + FIRSTPASS_STATS next_frame; + FIRSTPASS_STATS last_frame; double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; - double boost_score = 0; - double loop_decay_rate; - + double boost_score = 0.0; double kf_mod_err = 0.0; double kf_group_err = 0.0; double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; @@ -1949,8 +1944,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Provided that we are not at the end of the file... if (cpi->oxcf.auto_key && lookup_next_frame_stats(twopass, &next_frame) != EOF) { + double loop_decay_rate; + // Check for a scene cut. - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) + if (test_candidate_kf(twopass, &last_frame, this_frame, &next_frame)) break; // How fast is the prediction quality decaying? @@ -1966,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special check for transition or high motion followed by a // static scene. - if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i, + if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i, loop_decay_rate, decay_accumulator)) break; @@ -2019,7 +2016,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the number of bits that should be assigned to the kf group. if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) { // Maximum number of bits for a single normal frame (not key frame). - const int max_bits = frame_max_bits(cpi); + const int max_bits = frame_max_bits(rc, &cpi->oxcf); // Maximum number of bits allocated to the key frame group. int64_t max_grp_bits; @@ -2071,10 +2068,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // How fast is prediction quality decaying. if (!detect_flash(twopass, 0)) { - loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); + const double loop_decay_rate = get_prediction_decay_rate(&cpi->common, + &next_frame); decay_accumulator *= loop_decay_rate; - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; + decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR); } boost_score += (decay_accumulator * r); @@ -2105,7 +2102,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (1) { int kf_boost = (int)boost_score; int allocation_chunks; - int alt_kf_bits; if (kf_boost < (rc->frames_to_key * 3)) kf_boost = (rc->frames_to_key * 3); @@ -2139,14 +2135,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Prevent overflow. if (kf_boost > 1028) { - int divisor = kf_boost >> 10; + const int divisor = kf_boost >> 10; kf_boost /= divisor; allocation_chunks /= divisor; } - twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0 - : twopass->kf_group_bits; - + twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); // Calculate the number of bits to be spent on the key frame. twopass->kf_bits = (int)((double)kf_boost * ((double)twopass->kf_group_bits / allocation_chunks)); @@ -2156,11 +2150,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // then use an alternate calculation based on the kf error score // which should give a smaller key frame. if (kf_mod_err < kf_group_err / rc->frames_to_key) { - double alt_kf_grp_bits = ((double)twopass->bits_left * + double alt_kf_grp_bits = ((double)twopass->bits_left * (kf_mod_err * (double)rc->frames_to_key) / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)); - alt_kf_bits = (int)((double)kf_boost * + const int alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); if (twopass->kf_bits > alt_kf_bits) @@ -2169,12 +2163,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Else if it is much harder than other frames in the group make sure // it at least receives an allocation in keeping with its relative // error score. - alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / + const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left))); - if (alt_kf_bits > twopass->kf_bits) { + if (alt_kf_bits > twopass->kf_bits) twopass->kf_bits = alt_kf_bits; - } } twopass->kf_group_bits -= twopass->kf_bits; // Per frame bit target for this frame. diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index ede802a3b..f7a02a4a7 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -981,66 +981,49 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - int i, j, step; - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row, ref_col; - + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; // search_param determines the length of the initial step and hence the number // of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = // (MAX_FIRST_STEP/4) pel... etc. const search_site *const ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; - const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; + const uint8_t *best_address; + int best_sad = INT_MAX; + int best_site = 0; + int last_site = 0; + int i, j, step; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->row; - ref_col = ref_mv->col; + best_address = get_buf_from_mv(in_what, ref_mv); *num00 = 0; - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Work out the start point for the search - in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - best_address = in_what; + *best_mv = *ref_mv; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + best_sad = fn_ptr->sdf(what->buf, what->stride, + in_what->buf, in_what->stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); i = 1; for (step = 0; step < tot_steps; step++) { for (j = 0; j < x->searches_per_step; j++) { - const MV this_mv = {best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[i].offset + best_address; - int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; + if (is_mv_in(x, &mv)) { + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[i].offset, in_what->stride, + best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = i; } } @@ -1059,14 +1042,14 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const MV this_mv = {best_mv->row + ss[best_site].mv.row, best_mv->col + ss[best_site].mv.col}; if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[best_site].offset + best_address; - int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[best_site].offset, + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_mv->row += ss[best_site].mv.row; best_mv->col += ss[best_site].mv.col; best_address += ss[best_site].offset; @@ -1077,11 +1060,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, break; }; #endif - } else if (best_address == in_what) { + } else if (best_address == in_what->buf) { (*num00)++; } } - return bestsad; + return best_sad; } int vp9_diamond_search_sadx4(const MACROBLOCK *x, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 836f3e1e5..3619ec89e 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -41,6 +41,9 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_speed_features.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_ssim.h" +#endif #include "vp9/encoder/vp9_temporal_filter.h" #include "vp9/encoder/vp9_resize.h" #include "vp9/encoder/vp9_svc_layercontext.h" @@ -63,19 +66,6 @@ void vp9_coef_tree_initialize(); #define MAX_MB_RATE 250 #define MAXRATE_1080P 2025000 -#if CONFIG_INTERNAL_STATS -extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, int lumamask, - double *weight); - - -extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, double *ssim_y, - double *ssim_u, double *ssim_v); - - -#endif - // #define OUTPUT_YUV_REC #ifdef OUTPUT_YUV_SRC @@ -255,56 +245,6 @@ static void restore_coding_context(VP9_COMP *cpi) { cm->fc = cc->fc; } -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a target q value -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) { - const RATE_CONTROL *const rc = &cpi->rc; - int start_index = rc->worst_quality; - int target_index = rc->worst_quality; - int i; - - // Convert the average q value to an index. - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - start_index = i; - if (vp9_convert_qindex_to_q(i) >= qstart) - break; - } - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - target_index = i; - if (vp9_convert_qindex_to_q(i) >= qtarget) - break; - } - - return target_index - start_index; -} - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a value that should equate to the given rate ratio. -int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int qindex, - double rate_target_ratio) { - const FRAME_TYPE frame_type = cpi->common.frame_type; - const RATE_CONTROL *const rc = &cpi->rc; - int target_index = rc->worst_quality; - int i; - - // Look up the current projected bits per block for the base index - const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0); - - // Find the target bits per mb based on the base value and given ratio. - const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - target_index = i; - if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb ) - break; - } - - return target_index - qindex; -} - static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; @@ -348,7 +288,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_map = 1; seg->update_data = 1; - qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 0.875); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); @@ -369,7 +309,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; - qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 1.125); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); @@ -804,7 +744,8 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->oxcf = *oxcf; - cm->version = oxcf->version; + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; cm->width = oxcf->width; cm->height = oxcf->height; @@ -844,8 +785,14 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; - if (cm->version != oxcf->version) - cm->version = oxcf->version; + if (cm->profile != oxcf->profile) + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; + + if (cm->profile <= PROFILE_1) + assert(cm->bit_depth == BITS_8); + else + assert(cm->bit_depth > BITS_8); cpi->oxcf = *oxcf; @@ -881,10 +828,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { break; } - cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; - cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; - cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - cpi->oxcf.lossless = oxcf->lossless; if (cpi->oxcf.lossless) { // In lossless mode, make sure right quantizer range and correct transform @@ -1626,53 +1569,42 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } +static int64_t get_sse(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int width, int height) { + const int dw = width % 16; + const int dh = height % 16; + int64_t total_sse = 0; + unsigned int sse = 0; + int sum = 0; + int x, y; + + if (dw > 0) { + variance(&a[width - dw], a_stride, &b[width - dw], b_stride, + dw, height, &sse, &sum); + total_sse += sse; + } + if (dh > 0) { + variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); + total_sse += sse; + } -static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride, - const uint8_t *recon, int recon_stride, - unsigned int cols, unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row + 16 <= rows; row += 16) { - for (col = 0; col + 16 <= cols; col += 16) { - unsigned int sse; - - vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); + for (y = 0; y < height / 16; ++y) { + const uint8_t *pa = a; + const uint8_t *pb = b; + for (x = 0; x < width / 16; ++x) { + vp9_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; - } - - /* Handle odd-sized width */ - if (col < cols) { - unsigned int border_row, border_col; - const uint8_t *border_orig = orig; - const uint8_t *border_recon = recon; - for (border_row = 0; border_row < 16; border_row++) { - for (border_col = col; border_col < cols; border_col++) { - diff = border_orig[border_col] - border_recon[border_col]; - total_sse += diff * diff; - } - - border_orig += orig_stride; - border_recon += recon_stride; - } - } - - orig += orig_stride * 16; - recon += recon_stride * 16; - } - - /* Handle odd-sized height */ - for (; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; + pa += 16; + pb += 16; } - orig += orig_stride; - recon += recon_stride; + a += 16 * a_stride; + b += 16 * b_stride; } return total_sse; @@ -1700,9 +1632,9 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, const int w = widths[i]; const int h = heights[i]; const uint32_t samples = w * h; - const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); + const uint64_t sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); psnr->sse[1 + i] = sse; psnr->samples[1 + i] = samples; psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse); @@ -2176,7 +2108,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { vp9_clear_system_state(); - recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10u %10d %10d %10d %10d %10d " @@ -2347,7 +2279,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; - int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; int low_err_target = cpi->ambient_err >> 1; @@ -2704,7 +2636,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // fixed interval. Note the reconstruction error if it is the frame before // the force key frame if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { - cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } // If the encoder forced a KEY_FRAME decision @@ -2824,15 +2756,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; - if (cpi->use_svc) { - LAYER_CONTEXT *lc; - if (cpi->svc.number_temporal_layers > 1) { - lc = &cpi->svc.layer_context[cpi->svc.temporal_layer_id]; - } else { - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; - } - ++lc->current_video_frame_in_layer; - } + if (cpi->use_svc) + vp9_inc_frame_in_layer(&cpi->svc); } // restore prev_mi @@ -2908,7 +2833,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); - if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) { + if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) { vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, "Non-4:2:0 color space requires profile >= 1"); res = -1; @@ -3466,28 +3391,12 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { return; } -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference) { - int i, j; - int total = 0; - - const uint8_t *src = source->y_buffer; - const uint8_t *ref = reference->y_buffer; - - // Loop through the Y plane raw and reconstruction data summing - // (square differences) - for (i = 0; i < source->y_height; i += 16) { - for (j = 0; j < source->y_width; j += 16) { - unsigned int sse; - total += vp9_mse16x16(src + j, source->y_stride, - ref + j, reference->y_stride, &sse); - } - - src += 16 * source->y_stride; - ref += 16 * reference->y_stride; - } +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); - return total; + return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 655661835..e30fb02b2 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -186,9 +186,8 @@ typedef enum { } AQ_MODE; typedef struct VP9_CONFIG { - int version; // 4 versions of bitstream defined: - // 0 - best quality/slowest decode, - // 3 - lowest quality/fastest decode + BITSTREAM_PROFILE profile; + BIT_DEPTH bit_depth; int width; // width of data passed to the compressor int height; // height of data passed to the compressor double framerate; // set to passed in framerate @@ -497,6 +496,8 @@ typedef struct VP9_COMP { SVC svc; + int use_large_partition_rate; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; @@ -605,16 +606,10 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (16 * 16 * 3 + 4); } -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference); +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); void vp9_alloc_compressor_data(VP9_COMP *cpi); -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); - -int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, - double rate_target_ratio); - void vp9_scale_references(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi); diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 92ad1e745..3ac85228b 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -35,7 +35,7 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, int filt_err; vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); + filt_err = vp9_get_y_sse(sd, cm->frame_to_show); // Re-instate the unfiltered frame vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 9c7e8c109..f3fe99cdb 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -214,6 +214,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, struct macroblockd_plane *const pd = &xd->plane[0]; MB_PREDICTION_MODE this_mode, best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; + INTERP_FILTER best_pred_filter = EIGHTTAP; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, @@ -236,6 +237,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame. int mode_idx[MB_MODE_COUNT] = {0}; + INTERP_FILTER filter_ref = SWITCHABLE; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -267,6 +269,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } + if (xd->up_available) + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; + else if (xd->left_available) + filter_ref = xd->mi[-1]->mbmi.interp_filter; + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; @@ -326,9 +333,63 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = this_mode; mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + // Search for the best prediction filter type, when the resulting + // motion vector is at sub-pixel accuracy level for luma component, i.e., + // the last three bits are all zeros. + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && + ((mbmi->mv[0].as_mv.row & 0x07) != 0 || + (mbmi->mv[0].as_mv.col & 0x07) != 0)) { + int64_t tmp_rdcost1 = INT64_MAX; + int64_t tmp_rdcost2 = INT64_MAX; + int64_t tmp_rdcost3 = INT64_MAX; + int pf_rate[3]; + int64_t pf_dist[3]; + + mbmi->interp_filter = EIGHTTAP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP], + &pf_dist[EIGHTTAP]); + tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP], + pf_dist[EIGHTTAP]); + + mbmi->interp_filter = EIGHTTAP_SHARP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP], + &pf_dist[EIGHTTAP_SHARP]); + tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP], + pf_dist[EIGHTTAP_SHARP]); + + mbmi->interp_filter = EIGHTTAP_SMOOTH; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH], + &pf_dist[EIGHTTAP_SMOOTH]); + tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH], + pf_dist[EIGHTTAP_SMOOTH]); + + if (tmp_rdcost2 < tmp_rdcost1) { + if (tmp_rdcost2 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP_SHARP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } else { + if (tmp_rdcost1 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } + + rate = pf_rate[mbmi->interp_filter]; + dist = pf_dist[mbmi->interp_filter]; + } else { + mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + } + rate += rate_mv; rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; @@ -339,12 +400,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = rate; *returndistortion = dist; best_mode = this_mode; + best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; } } } mbmi->mode = best_mode; + mbmi->interp_filter = best_pred_filter; mbmi->ref_frame[0] = best_ref_frame; mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index c36b30b0c..342081644 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -35,9 +35,6 @@ #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 -// Bits Per MB at different Q (Multiplied by 512) -#define BPER_MB_NORMBITS 9 - // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; @@ -106,15 +103,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, return (int)(0.5 + (enumerator * correction_factor / q)); } -static int estimate_bits_at_q(int frame_kind, int q, int mbs, +static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor) { - const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor)); - - // Attempt to retain reasonable accuracy without overflow. The cutoff is - // chosen such that the maximum product of Bpm and MBs fits 31 bits. The - // largest Bpm takes 20 bits. - return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs - : (bpm * mbs) >> BPER_MB_NORMBITS; + const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor)); + return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS; } int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { @@ -151,13 +143,12 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { // Update the buffer level for higher layers, given the encoded current layer. -static void update_layer_buffer_level(VP9_COMP *const cpi, - int encoded_frame_size) { +static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { int temporal_layer = 0; - int current_temporal_layer = cpi->svc.temporal_layer_id; + int current_temporal_layer = svc->temporal_layer_id; for (temporal_layer = current_temporal_layer + 1; - temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + temporal_layer < svc->number_temporal_layers; ++temporal_layer) { + LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer]; RATE_CONTROL *lrc = &lc->rc; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); @@ -187,7 +178,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->buffer_level = rc->bits_off_target; if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - update_layer_buffer_level(cpi, encoded_frame_size); + update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } @@ -284,6 +275,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { return cpi->rc.key_frame_rate_correction_factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) return cpi->rc.gf_rate_correction_factor; else @@ -296,6 +288,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { cpi->rc.key_frame_rate_correction_factor = factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) cpi->rc.gf_rate_correction_factor = factor; else @@ -304,7 +297,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { } void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { - const int q = cpi->common.base_qindex; + const VP9_COMMON *const cm = &cpi->common; int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; @@ -317,8 +310,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { // Work out how big we would have expected the frame to be at this Q given // the current correction factor. // Stay in double to avoid int overflow when values are large - projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q, - cpi->common.MBs, + projected_size_based_on_q = estimate_bits_at_q(cm->frame_type, + cm->base_qindex, cm->MBs, rate_correction_factor); // Work out a size correction factor. if (projected_size_based_on_q > 0) @@ -342,20 +335,18 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { if (correction_factor > 102) { // We are not already at the worst allowable quality - correction_factor = - (int)(100 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 + ((correction_factor - 100) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; } else if (correction_factor < 99) { // We are not already at the best allowable quality - correction_factor = - (int)(100 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 - ((100 - correction_factor) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor < MIN_BPB_FACTOR) @@ -376,11 +367,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. - if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - // Case where we would overflow int - target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS; - else - target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; + target_bits_per_mb = + ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; i = active_best_quality; @@ -447,6 +435,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // If buffer is below the optimal level, let the active_worst_quality go from // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). + const VP9_COMMON *const cm = &cpi->common; const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. @@ -454,9 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; - if (cpi->common.frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) return rc->worst_quality; - if (cpi->common.current_video_frame > 1) + if (cm->current_video_frame > 1) active_worst_quality = MIN(rc->worst_quality, rc->avg_frame_qindex[INTER_FRAME] * 5 / 4); else @@ -510,7 +499,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, (last_boosted_q * 0.75)); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { @@ -532,8 +521,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } } else if (!rc->is_src_frame_alt_ref && !cpi->use_svc && @@ -589,7 +578,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -622,8 +611,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { // not first frame of one pass and kf_boost is set @@ -644,15 +633,15 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -755,7 +744,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -771,7 +760,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -804,8 +793,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else { // Not forced keyframe. @@ -829,15 +818,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -954,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -970,8 +959,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, } int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, - int *bottom_index, - int *top_index) { + int *bottom_index, int *top_index) { int q; if (cpi->pass == 0) { if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) @@ -1039,16 +1027,17 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { static void update_alt_ref_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user - cpi->rc.frames_since_golden = 0; + RATE_CONTROL *const rc = &cpi->rc; + rc->frames_since_golden = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif // Clear the alternate reference update pending flag. - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; // Set the alternate reference frame active flag - cpi->rc.source_alt_ref_active = 1; + rc->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP9_COMP *cpi) { @@ -1077,6 +1066,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; cm->last_frame_type = cm->frame_type; @@ -1086,7 +1076,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Post encode loop adjustment of Q prediction. vp9_rc_update_rate_correction_factors( cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF || - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { @@ -1095,7 +1085,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { + !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { rc->last_q[2] = cm->base_qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); @@ -1145,7 +1135,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame && + if (oxcf->play_alternate && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); @@ -1238,18 +1228,19 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; + const SVC *const svc = &cpi->svc; const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->av_per_frame_bandwidth; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (svc->number_temporal_layers > 1 && + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { // Note that for layers, av_per_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). - int current_temporal_layer = cpi->svc.temporal_layer_id; - const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer]; + int current_temporal_layer = svc->temporal_layer_id; + const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer]; target = lc->avg_frame_size; min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } @@ -1332,3 +1323,46 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { rc->frames_till_gf_update_due = INT_MAX; rc->baseline_gf_interval = INT_MAX; } + +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) { + int start_index = rc->worst_quality; + int target_index = rc->worst_quality; + int i; + + // Convert the average q value to an index. + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + start_index = i; + if (vp9_convert_qindex_to_q(i) >= qstart) + break; + } + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_convert_qindex_to_q(i) >= qtarget) + break; + } + + return target_index - start_index; +} + +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio) { + int target_index = rc->worst_quality; + int i; + + // Look up the current projected bits per block for the base index + const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0); + + // Find the target bits per mb based on the base value and given ratio. + const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb ) + break; + } + + return target_index - qindex; +} diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 99e4b1639..7693c2b13 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -22,6 +22,9 @@ extern "C" { #define FRAME_OVERHEAD_BITS 200 +// Bits Per MB at different Q (Multiplied by 512) +#define BPER_MB_NORMBITS 9 + typedef struct { // Rate targetting variables int this_frame_target; @@ -163,6 +166,15 @@ int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi, // This function is called only from the vp9_rc_get_..._params() functions. void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target); +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a target q value +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget); + +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a value that should equate to the given rate ratio. +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 1066f6a17..f4ea7cd55 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2358,7 +2358,7 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } -static INLINE int get_switchable_rate(const MACROBLOCK *x) { +int vp9_get_switchable_rate(const MACROBLOCK *x) { const MACROBLOCKD *const xd = &x->e_mbd; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); @@ -2815,7 +2815,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { @@ -2885,7 +2885,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : *best_filter; - rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2915,7 +2915,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += get_switchable_rate(x); + *rate2 += vp9_get_switchable_rate(x); if (!is_comp_pred) { if (!x->in_active_map) { @@ -4072,7 +4072,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = @@ -4153,7 +4153,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += get_switchable_rate(x); + rate2 += vp9_get_switchable_rate(x); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 6968fa604..a01dbd4d3 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -40,6 +40,8 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); +int vp9_get_switchable_rate(const MACROBLOCK *x); + void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, MV_REFERENCE_FRAME ref_frame, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index b79e15979..d6b6174fa 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -209,6 +209,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->use_square_partition_only = 1; sf->disable_filter_search_var_thresh = 100; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->constrain_copy_partition = 1; sf->use_uv_intra_rd_estimate = 1; sf->skip_encode_sb = 1; sf->subpel_iters_per_step = 1; @@ -264,7 +265,11 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, } if (speed >= 6) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; + // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. + sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 360; + sf->use_nonrd_pick_mode = 1; sf->search_method = FAST_DIAMOND; } @@ -310,6 +315,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4; + sf->constrain_copy_partition = 0; sf->disable_split_mask = 0; sf->mode_search_skip_flags = 0; sf->force_frame_boost = 0; @@ -336,6 +342,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. sf->always_this_block_size = BLOCK_16X16; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 100; // Recode loop tolerence %. sf->recode_tolerance = 25; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 922f8803e..72f548a04 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -110,7 +110,10 @@ typedef enum { // Use an arbitrary partitioning scheme based on source variance within // a 64X64 SB - VAR_BASED_PARTITION + VAR_BASED_PARTITION, + + // Use non-fixed partitions based on source variance + SOURCE_VAR_BASED_PARTITION } PARTITION_SEARCH_TYPE; typedef enum { @@ -245,6 +248,12 @@ typedef struct { // use_lastframe_partitioning is set. int last_partitioning_redo_frequency; + // This enables constrained copy partitioning, which, given an input block + // size bsize, will copy previous partition for partitions less than bsize, + // otherwise bsize partition is used. bsize is currently set to 16x16. + // Used for the case where motion is detected in superblock. + int constrain_copy_partition; + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable // it always, to allow it for only Last frame and Intra, disable it for all // inter modes or to enable it always. @@ -329,6 +338,13 @@ typedef struct { // used in inter frames. // TODO(aconverse): Fold this into one of the other many mode skips BLOCK_SIZE max_intra_bsize; + + // The frequency that we check if SOURCE_VAR_BASED_PARTITION or + // FIXED_PARTITION search type should be used. + int search_type_check_frequency; + + // The threshold used in SOURCE_VAR_BASED_PARTITION search type. + int source_var_thresh; } SPEED_FEATURES; struct VP9_COMP; diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c index 1435191d0..026e6a8fd 100644 --- a/vp9/encoder/vp9_ssim.c +++ b/vp9/encoder/vp9_ssim.c @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" -#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_ssim.h" void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, diff --git a/vp9/encoder/vp9_ssim.h b/vp9/encoder/vp9_ssim.h new file mode 100644 index 000000000..a581c2c23 --- /dev/null +++ b/vp9/encoder/vp9_ssim.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SSIM_H_ +#define VP9_ENCODER_VP9_SSIM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vpx_scale/yv12config.h" + +double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + int lumamask, double *weight); + +double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SSIM_H_ diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index caa0ec0e3..c2b6263f0 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -32,8 +32,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; lc->current_video_frame_in_layer = 0; - lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q]; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; lrc->ni_tot_qi = 0; @@ -47,12 +47,12 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { if (svc->number_temporal_layers > 1) { lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; - lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; + lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; } else { lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; - lrc->last_q[0] = q_trans[oxcf->best_allowed_q]; - lrc->last_q[1] = q_trans[oxcf->best_allowed_q]; - lrc->last_q[2] = q_trans[oxcf->best_allowed_q]; + lrc->last_q[0] = oxcf->best_allowed_q; + lrc->last_q[1] = oxcf->best_allowed_q; + lrc->last_q[2] = oxcf->best_allowed_q; } lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), @@ -215,3 +215,10 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { } svc->spatial_layer_id = 0; } + +void vp9_inc_frame_in_layer(SVC *svc) { + LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1) + ? &svc->layer_context[svc->temporal_layer_id] + : &svc->layer_context[svc->spatial_layer_id]; + ++lc->current_video_frame_in_layer; +} diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index e859a2fd5..2abed3055 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -70,6 +70,9 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi); // Initialize second pass rc for spatial svc. void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); +// Increment number of video frames in layer +void vp9_inc_frame_in_layer(SVC *svc); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 996f730ef..71867a938 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, return (var - (((int64_t)avg * avg) >> 10)); } +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index dfcec783c..152e1f46e 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -38,6 +38,7 @@ struct vp9_extracfg { unsigned int frame_parallel_decoding_mode; AQ_MODE aq_mode; unsigned int frame_periodic_boost; + BIT_DEPTH bit_depth; }; struct extraconfig_map { @@ -67,6 +68,7 @@ static const struct extraconfig_map extracfg_map[] = { 0, // frame_parallel_decoding_mode NO_AQ, // aq_mode 0, // frame_periodic_delta_q + BITS_8, // Bit depth } } }; @@ -252,6 +254,12 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, ERROR("rc_twopass_stats_in missing EOS stats packet"); } } + if (cfg->g_profile <= (unsigned int)PROFILE_1 && + extra_cfg->bit_depth > BITS_8) + ERROR("High bit-depth not supported in profile < 2"); + if (cfg->g_profile > (unsigned int)PROFILE_1 && + extra_cfg->bit_depth == BITS_8) + ERROR("Bit-depth 8 not supported in profile > 1"); return VPX_CODEC_OK; } @@ -277,11 +285,14 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t set_encoder_config(VP9_CONFIG *oxcf, - const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { - oxcf->version = cfg->g_profile; +static vpx_codec_err_t set_encoder_config( + VP9_CONFIG *oxcf, + const vpx_codec_enc_cfg_t *cfg, + const struct vp9_extracfg *extra_cfg) { + oxcf->profile = cfg->g_profile; oxcf->width = cfg->g_w; oxcf->height = cfg->g_h; + oxcf->bit_depth = extra_cfg->bit_depth; // guess a frame rate if out of whack, use 30 oxcf->framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num; if (oxcf->framerate > 180) @@ -313,9 +324,9 @@ static vpx_codec_err_t set_encoder_config(VP9_CONFIG *oxcf, oxcf->target_bandwidth = cfg->rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; - oxcf->best_allowed_q = cfg->rc_min_quantizer; - oxcf->worst_allowed_q = cfg->rc_max_quantizer; - oxcf->cq_level = extra_cfg->cq_level; + oxcf->best_allowed_q = q_trans[cfg->rc_min_quantizer]; + oxcf->worst_allowed_q = q_trans[cfg->rc_max_quantizer]; + oxcf->cq_level = q_trans[extra_cfg->cq_level]; oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg->rc_undershoot_pct; diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 2163b7bae..5ed7484ab 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -33,7 +33,7 @@ struct vpx_codec_alg_priv { vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; int decoder_init; - struct VP9Decompressor *pbi; + struct VP9Decoder *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; #if CONFIG_POSTPROC_VISUALIZER @@ -89,6 +89,8 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { ctx->pbi = NULL; } + vpx_free(ctx); + return VPX_CODEC_OK; } diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 21a388e51..da6c0f8b6 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -68,6 +68,7 @@ VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c VP9_CX_SRCS-yes += encoder/vp9_resize.c VP9_CX_SRCS-yes += encoder/vp9_resize.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c VP9_CX_SRCS-yes += encoder/vp9_variance.c |