diff options
28 files changed, 490 insertions, 428 deletions
diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl index f5f59b146..295443c0f 100755 --- a/build/make/rtcd.pl +++ b/build/make/rtcd.pl @@ -3,7 +3,7 @@ no strict 'refs'; use warnings; use Getopt::Long; -Getopt::Long::Configure("auto_help"); +Getopt::Long::Configure("auto_help") if $Getopt::Long::VERSION > 2.32; my %ALL_FUNCS = (); my @ALL_ARCHS; @@ -25,6 +25,7 @@ Advanced options: ${toggle_docs} documentation ${toggle_unit_tests} unit tests ${toggle_decode_perf_tests} build decoder perf tests with unit tests + ${toggle_encode_perf_tests} build encoder perf tests with unit tests --libc=PATH path to alternate libc --size-limit=WxH max size to allow in the decoder --as={yasm|nasm|auto} use specified assembler [auto, yasm preferred] @@ -273,7 +274,7 @@ HAVE_LIST=" EXPERIMENT_LIST=" multiple_arf spatial_svc - denoising + vp9_temporal_denoising fp_mb_stats " CONFIG_LIST=" @@ -324,6 +325,7 @@ CONFIG_LIST=" webm_io libyuv decode_perf_tests + encode_perf_tests multi_res_encoding temporal_denoising experimental @@ -380,6 +382,7 @@ CMDLINE_SELECT=" webm_io libyuv decode_perf_tests + encode_perf_tests multi_res_encoding temporal_denoising experimental diff --git a/test/decode_perf_test.cc b/test/decode_perf_test.cc index b612f23c4..11529b349 100644 --- a/test/decode_perf_test.cc +++ b/test/decode_perf_test.cc @@ -92,6 +92,7 @@ TEST_P(DecodePerfTest, PerfTest) { const double fps = double(frames) / elapsed_secs; printf("{\n"); + printf("\t\"type\" : \"decode_perf_test\",\n"); printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); printf("\t\"videoName\" : \"%s\",\n", video_name); printf("\t\"threadCount\" : %u,\n", threads); diff --git a/test/encode_perf_test.cc b/test/encode_perf_test.cc new file mode 100644 index 000000000..feef37e7b --- /dev/null +++ b/test/encode_perf_test.cc @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "./vpx_config.h" +#include "./vpx_version.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "vpx_ports/vpx_timer.h" + +namespace { + +const int kMaxPsnr = 100; +const double kUsecsInSec = 1000000.0; + +struct EncodePerfTestVideo { + EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, + uint32_t bitrate_, int frames_) + : name(name_), + width(width_), + height(height_), + bitrate(bitrate_), + frames(frames_) {} + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = { + EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484), + EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987), + EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718), + EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471), + EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", + 640, 480, 200, 300), + EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), +}; + +const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 12 }; + +#define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0])) + +class VP9EncodePerfTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { + protected: + VP9EncodePerfTest() + : EncoderTest(GET_PARAM(0)), + min_psnr_(kMaxPsnr), + nframes_(0), + encoding_mode_(GET_PARAM(1)), + speed_(0) {} + + virtual ~VP9EncodePerfTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + + cfg_.g_lag_in_frames = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_resize_allowed = 0; + cfg_.rc_end_usage = VPX_CBR; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, speed_); + } + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + min_psnr_ = kMaxPsnr; + nframes_ = 0; + } + + virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.psnr.psnr[0] < min_psnr_) { + min_psnr_= pkt->data.psnr.psnr[0]; + } + } + + // for performance reasons don't decode + virtual bool DoDecode() { return 0; } + + double min_psnr() const { + return min_psnr_; + } + + void set_speed(unsigned int speed) { + speed_ = speed; + } + + private: + double min_psnr_; + unsigned int nframes_; + libvpx_test::TestMode encoding_mode_; + unsigned speed_; +}; + +TEST_P(VP9EncodePerfTest, PerfTest) { + for (size_t i = 0; i < NELEMENTS(kVP9EncodePerfTestVectors); ++i) { + for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) { + SetUp(); + + const vpx_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate; + + init_flags_ = VPX_CODEC_USE_PSNR; + + const unsigned frames = kVP9EncodePerfTestVectors[i].frames; + const char *video_name = kVP9EncodePerfTestVectors[i].name; + libvpx_test::I420VideoSource video( + video_name, + kVP9EncodePerfTestVectors[i].width, + kVP9EncodePerfTestVectors[i].height, + timebase.den, timebase.num, 0, + kVP9EncodePerfTestVectors[i].frames); + set_speed(kEncodePerfTestSpeeds[j]); + + vpx_usec_timer t; + vpx_usec_timer_start(&t); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + vpx_usec_timer_mark(&t); + const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec; + const double fps = frames / elapsed_secs; + const double minimum_psnr = min_psnr(); + + printf("{\n"); + printf("\t\"type\" : \"encode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", video_name); + printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", frames); + printf("\t\"framesPerSecond\" : %f,\n", fps); + printf("\t\"minPsnr\" : %f,\n", minimum_psnr); + printf("\t\"speed\" : %d\n", kEncodePerfTestSpeeds[j]); + printf("}\n"); + } + } +} + +VP9_INSTANTIATE_TEST_CASE( + VP9EncodePerfTest, ::testing::Values(::libvpx_test::kRealTime)); +} // namespace diff --git a/test/frame_size_tests.cc b/test/frame_size_tests.cc index b05d12e9e..34ee8b605 100644 --- a/test/frame_size_tests.cc +++ b/test/frame_size_tests.cc @@ -62,16 +62,6 @@ TEST_F(VP9FrameSizeTestsLarge, TestInvalidSizes) { video.set_limit(2); expected_res_ = VPX_CODEC_CORRUPT_FRAME; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -#else - // If we are on a 32 bit platform we can't possibly allocate enough memory - // for the largest video frame size (64kx64k). This test checks that we - // properly return a memory error. - if (sizeof(size_t) == 4) { - video.SetSize(65535, 65535); - video.set_limit(2); - expected_res_ = VPX_CODEC_MEM_ERROR; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - } #endif } @@ -89,8 +79,7 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) { // one for each lag in frames (for 2 pass), and then one for each possible // reference buffer (8) - we can end up with up to 30 buffers of roughly this // size or almost 1 gig of memory. - // TODO(jzern): restore this to at least 4096x4096 after issue #828 is fixed. - video.SetSize(4096, 2160); + video.SetSize(4096, 4096); video.set_limit(2); expected_res_ = VPX_CODEC_OK; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); diff --git a/test/scale_border_test.cc b/test/scale_border_test.cc new file mode 100644 index 000000000..cc9a69a7d --- /dev/null +++ b/test/scale_border_test.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/clear_system_state.h" +#include "test/register_state_check.h" + +#include "./vpx_config.h" +#include "./vpx_scale_rtcd.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_scale/yv12config.h" + +namespace { + +typedef void (*ExtendFrameBorderFunc)(YV12_BUFFER_CONFIG *ybf); + +class ExtendBorderTest + : public ::testing::TestWithParam<ExtendFrameBorderFunc> { + public: + virtual ~ExtendBorderTest() { + libvpx_test::ClearSystemState(); + } + + void ResetImage(int width, int height) { + width_ = width; + height_ = height; + vpx_memset(&img_, 0, sizeof(img_)); + ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&img_, width_, height_, + VP8BORDERINPIXELS)); + + vpx_memset(img_.buffer_alloc, kBufFiller, img_.frame_size); + FillPlane(img_.y_buffer, img_.y_crop_width, img_.y_crop_height, + img_.y_stride); + FillPlane(img_.u_buffer, img_.uv_crop_width, img_.uv_crop_height, + img_.uv_stride); + FillPlane(img_.v_buffer, img_.uv_crop_width, img_.uv_crop_height, + img_.uv_stride); + + vpx_memset(&ref_img_, 0, sizeof(ref_img_)); + ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&ref_img_, width_, height_, + VP8BORDERINPIXELS)); + + vpx_memset(ref_img_.buffer_alloc, kBufFiller, ref_img_.frame_size); + FillPlane(ref_img_.y_buffer, ref_img_.y_crop_width, ref_img_.y_crop_height, + ref_img_.y_stride); + FillPlane(ref_img_.u_buffer, + ref_img_.uv_crop_width, ref_img_.uv_crop_height, + ref_img_.uv_stride); + FillPlane(ref_img_.v_buffer, + ref_img_.uv_crop_width, ref_img_.uv_crop_height, + ref_img_.uv_stride); + } + + void DeallocImage() { + vp8_yv12_de_alloc_frame_buffer(&img_); + vp8_yv12_de_alloc_frame_buffer(&ref_img_); + } + + private: + static const int kBufFiller = 123; + static const int kBufMax = kBufFiller - 1; + + virtual void SetUp() { + extend_fn_ = GetParam(); + } + + static void FillPlane(uint8_t *buf, int width, int height, int stride) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + buf[x + (y * stride)] = (x + (width * y)) % kBufMax; + } + } + } + + void ReferenceExtendBorder() { + ExtendPlane(ref_img_.y_buffer, + ref_img_.y_crop_width, ref_img_.y_crop_height, + ref_img_.y_width, ref_img_.y_height, + ref_img_.y_stride, + ref_img_.border); + ExtendPlane(ref_img_.u_buffer, + ref_img_.uv_crop_width, ref_img_.uv_crop_height, + ref_img_.uv_width, ref_img_.uv_height, + ref_img_.uv_stride, + ref_img_.border / 2); + ExtendPlane(ref_img_.v_buffer, + ref_img_.uv_crop_width, ref_img_.uv_crop_height, + ref_img_.uv_width, ref_img_.uv_height, + ref_img_.uv_stride, + ref_img_.border / 2); + } + + static void ExtendPlane(uint8_t *buf, int crop_width, int crop_height, + int width, int height, int stride, int padding) { + // Copy the outermost visible pixel to a distance of at least 'padding.' + // The buffers are allocated such that there may be excess space outside the + // padding. As long as the minimum amount of padding is achieved it is not + // necessary to fill this space as well. + uint8_t *left = buf - padding; + uint8_t *right = buf + crop_width; + const int right_extend = padding + (width - crop_width); + const int bottom_extend = padding + (height - crop_height); + + // Fill the border pixels from the nearest image pixel. + for (int y = 0; y < crop_height; ++y) { + vpx_memset(left, left[padding], padding); + vpx_memset(right, right[-1], right_extend); + left += stride; + right += stride; + } + + left = buf - padding; + uint8_t *top = left - (stride * padding); + // The buffer does not always extend as far as the stride. + // Equivalent to padding + width + padding. + const int extend_width = padding + crop_width + right_extend; + + // The first row was already extended to the left and right. Copy it up. + for (int y = 0; y < padding; ++y) { + vpx_memcpy(top, left, extend_width); + top += stride; + } + + uint8_t *bottom = left + (crop_height * stride); + for (int y = 0; y < bottom_extend; ++y) { + vpx_memcpy(bottom, left + (crop_height - 1) * stride, extend_width); + bottom += stride; + } + } + + void ExtendBorder() { + ASM_REGISTER_STATE_CHECK(extend_fn_(&img_)); + } + + void CompareImages() { + EXPECT_EQ(ref_img_.frame_size, img_.frame_size); + EXPECT_EQ(0, memcmp(ref_img_.buffer_alloc, img_.buffer_alloc, + ref_img_.frame_size)); + } + + protected: + void RunTest() { +#if ARCH_ARM + // Some arm devices OOM when trying to allocate the largest buffers. + static const int kNumSizesToTest = 6; +#else + static const int kNumSizesToTest = 7; +#endif + static const int kSizesToTest[] = {1, 15, 33, 145, 512, 1025, 16383}; + for (int h = 0; h < kNumSizesToTest; ++h) { + for (int w = 0; w < kNumSizesToTest; ++w) { + ResetImage(kSizesToTest[w], kSizesToTest[h]); + ExtendBorder(); + ReferenceExtendBorder(); + CompareImages(); + DeallocImage(); + } + } + } + + YV12_BUFFER_CONFIG img_; + YV12_BUFFER_CONFIG ref_img_; + ExtendFrameBorderFunc extend_fn_; + int width_; + int height_; +}; + +TEST_P(ExtendBorderTest, ExtendBorder) { + ASSERT_NO_FATAL_FAILURE(RunTest()); +} + +INSTANTIATE_TEST_CASE_P(C, ExtendBorderTest, + ::testing::Values(vp8_yv12_extend_frame_borders_c)); +} // namespace diff --git a/test/test-data.sha1 b/test/test-data.sha1 index 74f7842d4..98ac0e670 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -669,3 +669,14 @@ c01bb7938f9a9f25e0c37afdec2f2fb73b6cc7fa vp90-2-17-show-existing-frame.webm cc75f351818b9a619818f5cc77b9bc013d0c1e11 vp90-2-17-show-existing-frame.webm.md5 0321d507ce62dedc8a51b4e9011f7a19aed9c3dc vp91-2-04-yuv444.webm 367e423dd41fdb49aa028574a2cfec5c2f325c5c vp91-2-04-yuv444.webm.md5 +eb438c6540eb429f74404eedfa3228d409c57874 desktop_640_360_30.yuv +89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab kirland_640_480_30.yuv +33c533192759e5bb4f07abfbac389dc259db4686 macmarcomoving_640_480_30.yuv +8bfaab121080821b8f03b23467911e59ec59b8fe macmarcostationary_640_480_30.yuv +70894878d916a599842d9ad0dcd24e10c13e5467 niklas_640_480_30.yuv +8784b6df2d8cc946195a90ac00540500d2e522e4 tacomanarrows_640_480_30.yuv +edd86a1f5e62fd9da9a9d46078247759c2638009 tacomasmallcameramovement_640_480_30.yuv +9a70e8b7d14fba9234d0e51dce876635413ce444 thaloundeskmtg_640_480_30.yuv +e7d315dbf4f3928779e0dc624311196d44491d32 niklas_1280_720_30.yuv +c77e4a26616add298a05dd5d12397be22c0e40c5 vp90-2-18-resize.ivf +c77e4a26616add298a05dd5d12397be22c0e40c5 vp90-2-18-resize.ivf diff --git a/test/test.mk b/test/test.mk index e3f3054f6..53d40572a 100644 --- a/test/test.mk +++ b/test/test.mk @@ -69,6 +69,11 @@ ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_VP9_DECODER)$(CONFIG_WEBM_IO), \ LIBVPX_TEST_SRCS-yes += decode_perf_test.cc endif +# encode perf tests are vp9 only +ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_VP9_ENCODER), yesyes) +LIBVPX_TEST_SRCS-yes += encode_perf_test.cc +endif + ## ## WHITE BOX TESTS ## @@ -94,6 +99,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc LIBVPX_TEST_SRCS-yes += idct_test.cc LIBVPX_TEST_SRCS-yes += intrapred_test.cc +LIBVPX_TEST_SRCS-yes += scale_border_test.cc LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc endif # VP8 @@ -776,6 +782,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-16-intra-only.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-16-intra-only.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-17-show-existing-frame.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-17-show-existing-frame.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-18-resize.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-18-resize.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5 @@ -838,3 +846,15 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \ vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm endif # CONFIG_DECODE_PERF_TESTS + +ifeq ($(CONFIG_ENCODE_PERF_TESTS),yes) +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_640_360_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += kirland_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcomoving_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcostationary_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv +endif # CONFIG_ENCODE_PERF_TESTS diff --git a/test/test_vectors.cc b/test/test_vectors.cc index 4ea4b9dab..dbdbdd6f9 100644 --- a/test/test_vectors.cc +++ b/test/test_vectors.cc @@ -181,7 +181,7 @@ const char *const kVP9TestVectors[] = { "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm", "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm", "vp90-2-16-intra-only.webm", "vp90-2-17-show-existing-frame.webm", - "vp91-2-04-yuv444.webm", + "vp90-2-18-resize.ivf", "vp91-2-04-yuv444.webm", }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER diff --git a/test/variance_test.cc b/test/variance_test.cc index 546977069..9dc7c6a45 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -90,14 +90,14 @@ class VarianceTest rnd(ACMRandom::DeterministicSeed()); block_size_ = width_ * height_; - src_ = new uint8_t[block_size_]; + src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_)); ref_ = new uint8_t[block_size_]; ASSERT_TRUE(src_ != NULL); ASSERT_TRUE(ref_ != NULL); } virtual void TearDown() { - delete[] src_; + vpx_free(src_); delete[] ref_; libvpx_test::ClearSystemState(); } diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 12e5011fe..469d0d6e9 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1879,6 +1879,13 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) */ cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode; cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5; + if (cpi->oxcf.number_of_layers == 1) { + cpi->cyclic_refresh_mode_max_mbs_perframe = + (cpi->common.mb_rows * cpi->common.mb_cols) / 20; + } else if (cpi->oxcf.number_of_layers == 2) { + cpi->cyclic_refresh_mode_max_mbs_perframe = + (cpi->common.mb_rows * cpi->common.mb_cols) / 10; + } cpi->cyclic_refresh_mode_index = 0; cpi->cyclic_refresh_q = 32; diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h index f36148035..3920619d8 100644 --- a/vp9/common/vp9_prob.h +++ b/vp9/common/vp9_prob.h @@ -47,18 +47,9 @@ static INLINE vp9_prob clip_prob(int p) { return (p > 255) ? 255u : (p < 1) ? 1u : p; } -// int64 is not needed for normal frame level calculations. -// However when outputting entropy stats accumulated over many frames -// or even clips we can overflow int math. -#ifdef ENTROPY_STATS static INLINE vp9_prob get_prob(int num, int den) { return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den); } -#else -static INLINE vp9_prob get_prob(int num, int den) { - return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den); -} -#endif static INLINE vp9_prob get_binary_prob(int n0, int n1) { return get_prob(n0, n0 + n1); diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 28c674a38..a448bd2b4 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -627,9 +627,13 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { "Width and height beyond allowed size."); #endif if (cm->width != width || cm->height != height) { + const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); + const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + // Change in frame size (assumption: color format does not change). if (cm->width == 0 || cm->height == 0 || - width * height > cm->width * cm->height) { + aligned_width > cm->width || + aligned_width * aligned_height > cm->width * cm->height) { if (vp9_alloc_context_buffers(cm, width, height)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); @@ -662,6 +666,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { int width, height; int found = 0, i; + int has_valid_ref_frame = 0; for (i = 0; i < REFS_PER_FRAME; ++i) { if (vp9_rb_read_bit(rb)) { YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; @@ -675,15 +680,21 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, if (!found) vp9_read_frame_size(rb, &width, &height); - // Check that each of the frames that this frame references has valid - // dimensions. + if (width <=0 || height <= 0) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame size"); + + // Check to make sure at least one of frames that this frame references + // has valid dimensions. for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (!valid_ref_frame_size(ref_frame->buf->y_width, ref_frame->buf->y_height, - width, height)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has invalid size"); + has_valid_ref_frame |= valid_ref_frame_size(ref_frame->buf->y_crop_width, + ref_frame->buf->y_crop_height, + width, height); } + if (!has_valid_ref_frame) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Referenced frame has invalid size"); resize_context_buffers(cm, width, height); setup_display_size(cm, rb); diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 1afaee1e3..32e80f93b 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -435,6 +435,11 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, for (ref = 0; ref < 1 + is_compound; ++ref) { const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + const int ref_idx = frame - LAST_FRAME; + if (cm->frame_refs[ref_idx].sf.x_scale_fp == REF_INVALID_SCALE || + cm->frame_refs[ref_idx].sf.y_scale_fp == REF_INVALID_SCALE ) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame], mi_row, mi_col); } diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 6d76914e9..b11a0ae3b 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -41,7 +41,7 @@ typedef struct { int64_t tx_rd_diff[TX_MODES]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING unsigned int newmv_sse; unsigned int zeromv_sse; PREDICTION_MODE best_sse_inter_mode; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f80c0bc3e..584bcb8f5 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1333,7 +1333,7 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, set_offsets(cpi, tile, mi_row, mi_col, bsize); update_state_rt(cpi, ctx, mi_row, mi_col, bsize); -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && output_enabled) { vp9_denoiser_denoise(&cpi->denoiser, &cpi->mb, mi_row, mi_col, MAX(BLOCK_8X8, bsize), ctx); @@ -2015,17 +2015,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ctx->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, pc_tree->leaf_split[0], best_rd, 0); - if (sum_rate == INT_MAX) { + if (sum_rate == INT_MAX) sum_rd = INT64_MAX; - } else { + else sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - update_state(cpi, pc_tree->leaf_split[0], mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, - pc_tree->leaf_split[0]); - update_partition_context(xd, mi_row, mi_col, subsize, bsize); - } - } } else { for (i = 0; i < 4 && sum_rd < best_rd; ++i) { const int x_idx = (i & 1) * mi_step; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index b1a5e0888..f8d26110d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -131,7 +131,8 @@ static void setup_frame(VP9_COMP *cpi) { } if (cm->frame_type == KEY_FRAME) { - cpi->refresh_golden_frame = 1; + if (!(cpi->use_svc && cpi->svc.number_temporal_layers == 1)) + cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; } else { cm->fc = cm->frame_contexts[cm->frame_context_idx]; @@ -671,7 +672,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->ext_refresh_frame_flags_pending = 0; cpi->ext_refresh_frame_context_pending = 0; -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -865,7 +866,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX]; cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED yuv_denoised_file = fopen("denoised.yuv", "ab"); #endif @@ -1119,7 +1120,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp9_denoiser_free(&(cpi->denoiser)); } @@ -1143,7 +1144,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_remove_common(&cpi->common); vpx_free(cpi); -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED fclose(yuv_denoised_file); #endif @@ -1357,7 +1358,7 @@ void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s, FILE *f) { } #endif -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING #if defined(OUTPUT_YUV_DENOISED) // The denoiser buffer is allocated as a YUV 440 buffer. This function writes it // as YUV 420. We simply use the top-left pixels of the UV buffers, since we do @@ -1604,7 +1605,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); } -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp9_denoiser_update_frame_info(&cpi->denoiser, *cpi->Source, @@ -2001,7 +2002,8 @@ static void get_ref_frame_flags(VP9_COMP *cpi) { if (cpi->gold_is_last) cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; - if (cpi->rc.frames_till_gf_update_due == INT_MAX) + if (cpi->rc.frames_till_gf_update_due == INT_MAX && + !(cpi->use_svc && cpi->svc.number_temporal_layers == 1)) cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; if (cpi->alt_is_last) @@ -2241,7 +2243,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index); } -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED if (cpi->oxcf.noise_sensitivity > 0) { vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME], diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 1419cf6f1..4b3f2ad56 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -37,7 +37,7 @@ #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_variance.h" -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING #include "vp9/encoder/vp9_denoiser.h" #endif @@ -430,7 +430,7 @@ typedef struct VP9_COMP { int multi_arf_enabled; int multi_arf_last_grp_enabled; -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING VP9_DENOISER denoiser; #endif } VP9_COMP; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 0140fb59e..30a0e9d0d 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -602,7 +602,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } -#if CONFIG_DENOISING +#if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y, this_mode, ctx); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index f0f9afcd5..1adbad9cf 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1238,6 +1238,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1; + cpi->ref_frame_flags &= (~VP9_ALT_FLAG); } if (cpi->pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { @@ -1252,7 +1253,10 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { lc->is_key_frame = 0; } else { lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; + if (lc->is_key_frame) + cpi->ref_frame_flags &= (~VP9_LAST_FLAG); } + cpi->ref_frame_flags &= (~VP9_ALT_FLAG); } if (cpi->pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0195f9bf7..f65ac7b2b 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -434,22 +434,26 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = MIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(x, rate, distortion, skip, - &sse[mbmi->tx_size], ref_best_rd, 0, bs, + sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); cpi->tx_stepdown_count[0]++; } static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, - int (*r)[2], int *rate, - int64_t *d, int64_t *distortion, - int *s, int *skip, + int *rate, + int64_t *distortion, + int *skip, + int64_t *psse, int64_t tx_cache[TX_MODES], + int64_t ref_best_rd, BLOCK_SIZE bs) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); + int r[TX_SIZES][2], s[TX_SIZES]; + int64_t d[TX_SIZES], sse[TX_SIZES]; int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -466,6 +470,9 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_tx_size; n++) { + txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n], + &sse[n], ref_best_rd, 0, bs, n, + cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { for (m = 0; m <= n - (n == max_tx_size); m++) { @@ -496,6 +503,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, *distortion = d[mbmi->tx_size]; *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; *skip = s[mbmi->tx_size]; + *psse = sse[mbmi->tx_size]; tx_cache[ONLY_4X4] = rd[TX_4X4][0]; tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; @@ -522,65 +530,39 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *psse, BLOCK_SIZE bs, int64_t txfm_cache[TX_MODES], int64_t ref_best_rd) { - int r[TX_SIZES][2], s[TX_SIZES]; - int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const TX_SIZE max_tx_size = max_txsize_lookup[bs]; - TX_SIZE tx_size; - assert(bs == mbmi->sb_type); + assert(bs == xd->mi[0]->mbmi.sb_type); vp9_subtract_plane(x, bs, 0); if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); - choose_largest_tx_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, + choose_largest_tx_size(cpi, x, rate, distortion, skip, psse, ref_best_rd, bs); - if (psse) - *psse = sse[mbmi->tx_size]; - return; + } else { + choose_tx_size_from_rd(cpi, x, rate, distortion, skip, psse, + txfm_cache, ref_best_rd, bs); } - - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size], - &sse[tx_size], ref_best_rd, 0, bs, tx_size, - cpi->sf.use_fast_coef_costing); - choose_tx_size_from_rd(cpi, x, r, rate, d, distortion, s, - skip, txfm_cache, bs); - - if (psse) - *psse = sse[mbmi->tx_size]; } static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, - int64_t *psse, BLOCK_SIZE bs, + BLOCK_SIZE bs, int64_t txfm_cache[TX_MODES], int64_t ref_best_rd) { - int64_t sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int64_t sse; - assert(bs == mbmi->sb_type); + assert(bs == xd->mi[0]->mbmi.sb_type); if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); - choose_largest_tx_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, + choose_largest_tx_size(cpi, x, rate, distortion, skip, &sse, ref_best_rd, bs); } else { - int r[TX_SIZES][2], s[TX_SIZES]; - int64_t d[TX_SIZES]; - TX_SIZE tx_size; - for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size) - txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], - &s[tx_size], &sse[tx_size], - ref_best_rd, 0, bs, tx_size, - cpi->sf.use_fast_coef_costing); - choose_tx_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, - bs); + choose_tx_size_from_rd(cpi, x, rate, distortion, skip, &sse, + txfm_cache, ref_best_rd, bs); } - if (psse) - *psse = sse[mbmi->tx_size]; } @@ -834,7 +816,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, mic->mbmi.mode = mode; intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, - &s, NULL, bsize, local_tx_cache, best_rd); + &s, bsize, local_tx_cache, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -2722,7 +2704,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; - intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, + intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, bsize, tx_cache, best_rd); if (rate_y == INT_MAX) @@ -3277,13 +3259,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; - if (comp_pred) { - mode_excluded = mode_excluded ? mode_excluded - : cm->reference_mode == SINGLE_REFERENCE; - } else if (ref_frame != INTRA_FRAME) { - mode_excluded = mode_excluded ? mode_excluded - : cm->reference_mode == COMPOUND_REFERENCE; - } + if (comp_pred) + mode_excluded = cm->reference_mode == SINGLE_REFERENCE; + else if (ref_frame != INTRA_FRAME) + mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 3f2f5b9d9..3381cb95a 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -23,8 +23,8 @@ VP9_CX_SRCS-yes += encoder/vp9_context_tree.h VP9_CX_SRCS-yes += encoder/vp9_cost.h VP9_CX_SRCS-yes += encoder/vp9_cost.c VP9_CX_SRCS-yes += encoder/vp9_dct.c -VP9_CX_SRCS-$(CONFIG_DENOISING) += encoder/vp9_denoiser.c -VP9_CX_SRCS-$(CONFIG_DENOISING) += encoder/vp9_denoiser.h +VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/vp9_denoiser.c +VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/vp9_denoiser.h VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.c diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm deleted file mode 100644 index b2eb9eb0f..000000000 --- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm +++ /dev/null @@ -1,308 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_yv12_extend_frame_borders_neon| - ARM - REQUIRE8 - PRESERVE8 - - INCLUDE vpx_scale_asm_offsets.asm - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); -; we depend on VP8BORDERINPIXELS being 32 - -|vp8_yv12_extend_frame_borders_neon| PROC - push {r4 - r10, lr} - vpush {d8 - d15} - - ; Border = 32 - ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width - ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1 - ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height - ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride - -; Border copy for Y plane -; copy the left and right most columns out - add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width) - sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1 - sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border - - mov r12, r4, lsr #2 ; plane_height / 4 - -copy_left_right_y - vld1.8 {d0[], d1[]}, [r1], lr - vld1.8 {d4[], d5[]}, [r2], lr - vld1.8 {d8[], d9[]}, [r1], lr - vld1.8 {d12[], d13[]}, [r2], lr - vld1.8 {d16[], d17[]}, [r1], lr - vld1.8 {d20[], d21[]}, [r2], lr - vld1.8 {d24[], d25[]}, [r1], lr - vld1.8 {d28[], d29[]}, [r2], lr - - vmov q1, q0 - vmov q3, q2 - vmov q5, q4 - vmov q7, q6 - vmov q9, q8 - vmov q11, q10 - vmov q13, q12 - vmov q15, q14 - - subs r12, r12, #1 - - vst1.8 {q0, q1}, [r5], lr - vst1.8 {q2, q3}, [r6], lr - vst1.8 {q4, q5}, [r5], lr - vst1.8 {q6, q7}, [r6], lr - vst1.8 {q8, q9}, [r5], lr - vst1.8 {q10, q11}, [r6], lr - vst1.8 {q12, q13}, [r5], lr - vst1.8 {q14, q15}, [r6], lr - - bne copy_left_right_y - -;Now copy the top and bottom source lines into each line of the respective borders - ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer - mul r8, r4, lr ; plane_height * plane_stride - - ; copy width is plane_stride - movs r12, lr, lsr #7 ; plane_stride / 128 - - sub r1, r1, #32 ; src_ptr1 = y_buffer - Border - add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride)) - sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride - sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) - ble extra_y_copy_needed ; plane stride < 128 - -copy_top_bottom_y - vld1.8 {q0, q1}, [r1]! - vld1.8 {q8, q9}, [r2]! - vld1.8 {q2, q3}, [r1]! - vld1.8 {q10, q11}, [r2]! - vld1.8 {q4, q5}, [r1]! - vld1.8 {q12, q13}, [r2]! - vld1.8 {q6, q7}, [r1]! - vld1.8 {q14, q15}, [r2]! - - mov r7, #32 ; Border - -top_bottom_32 - subs r7, r7, #1 - - vst1.8 {q0, q1}, [r5]! - vst1.8 {q8, q9}, [r6]! - vst1.8 {q2, q3}, [r5]! - vst1.8 {q10, q11}, [r6]! - vst1.8 {q4, q5}, [r5]! - vst1.8 {q12, q13}, [r6]! - vst1.8 {q6, q7}, [r5]! - vst1.8 {q14, q15}, [r6]! - - add r5, r5, lr ; dest_ptr1 += plane_stride - sub r5, r5, #128 ; dest_ptr1 -= 128 - add r6, r6, lr ; dest_ptr2 += plane_stride - sub r6, r6, #128 ; dest_ptr2 -= 128 - - bne top_bottom_32 - - sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride) - add r6, r2, lr ; src_ptr2 + plane_stride - - subs r12, r12, #1 - bne copy_top_bottom_y - -extra_y_copy_needed - mov r7, lr, lsr #4 ; check to see if extra copy is needed - ands r7, r7, #0x7 - bne extra_top_bottom_y -end_of_border_copy_y - -;Border copy for U, V planes -; Border = 16 - ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1 - ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride - ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width - ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height - - mov r10, #2 - -;copy the left and right most columns out -border_copy_uv - mov r1, r7 ; src_ptr1 needs to be saved for second half of loop - sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border - add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width) - sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1 - - mov r12, r4, lsr #3 ; plane_height / 8 - -copy_left_right_uv - vld1.8 {d0[], d1[]}, [r1], lr - vld1.8 {d2[], d3[]}, [r2], lr - vld1.8 {d4[], d5[]}, [r1], lr - vld1.8 {d6[], d7[]}, [r2], lr - vld1.8 {d8[], d9[]}, [r1], lr - vld1.8 {d10[], d11[]}, [r2], lr - vld1.8 {d12[], d13[]}, [r1], lr - vld1.8 {d14[], d15[]}, [r2], lr - vld1.8 {d16[], d17[]}, [r1], lr - vld1.8 {d18[], d19[]}, [r2], lr - vld1.8 {d20[], d21[]}, [r1], lr - vld1.8 {d22[], d23[]}, [r2], lr - vld1.8 {d24[], d25[]}, [r1], lr - vld1.8 {d26[], d27[]}, [r2], lr - vld1.8 {d28[], d29[]}, [r1], lr - vld1.8 {d30[], d31[]}, [r2], lr - - subs r12, r12, #1 - - vst1.8 {q0}, [r5], lr - vst1.8 {q1}, [r6], lr - vst1.8 {q2}, [r5], lr - vst1.8 {q3}, [r6], lr - vst1.8 {q4}, [r5], lr - vst1.8 {q5}, [r6], lr - vst1.8 {q6}, [r5], lr - vst1.8 {q7}, [r6], lr - vst1.8 {q8}, [r5], lr - vst1.8 {q9}, [r6], lr - vst1.8 {q10}, [r5], lr - vst1.8 {q11}, [r6], lr - vst1.8 {q12}, [r5], lr - vst1.8 {q13}, [r6], lr - vst1.8 {q14}, [r5], lr - vst1.8 {q15}, [r6], lr - - bne copy_left_right_uv - -;Now copy the top and bottom source lines into each line of the respective borders - mov r1, r7 - mul r8, r4, lr ; plane_height * plane_stride - movs r12, lr, lsr #6 ; plane_stride / 64 - - sub r1, r1, #16 ; src_ptr1 = u_buffer - Border - add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride) - sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride - sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) - ble extra_uv_copy_needed ; plane_stride < 64 - -copy_top_bottom_uv - vld1.8 {q0, q1}, [r1]! - vld1.8 {q8, q9}, [r2]! - vld1.8 {q2, q3}, [r1]! - vld1.8 {q10, q11}, [r2]! - - mov r7, #16 ; Border - -top_bottom_16 - subs r7, r7, #1 - - vst1.8 {q0, q1}, [r5]! - vst1.8 {q8, q9}, [r6]! - vst1.8 {q2, q3}, [r5]! - vst1.8 {q10, q11}, [r6]! - - add r5, r5, lr ; dest_ptr1 += plane_stride - sub r5, r5, #64 - add r6, r6, lr ; dest_ptr2 += plane_stride - sub r6, r6, #64 - - bne top_bottom_16 - - sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) - add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_stride - - subs r12, r12, #1 - bne copy_top_bottom_uv -extra_uv_copy_needed - mov r7, lr, lsr #3 ; check to see if extra copy is needed - ands r7, r7, #0x7 - bne extra_top_bottom_uv - -end_of_border_copy_uv - subs r10, r10, #1 - ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1 - bne border_copy_uv - - vpop {d8 - d15} - pop {r4 - r10, pc} - -;;;;;;;;;;;;;;;;;;;;;; -extra_top_bottom_y - vld1.8 {q0}, [r1]! - vld1.8 {q2}, [r2]! - - mov r9, #4 ; 32 >> 3 - -extra_top_bottom_32 - subs r9, r9, #1 - - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - vst1.8 {q0}, [r5], lr - vst1.8 {q2}, [r6], lr - bne extra_top_bottom_32 - - sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride) - add r6, r2, lr ; src_ptr2 + plane_stride - subs r7, r7, #1 - bne extra_top_bottom_y - - b end_of_border_copy_y - -extra_top_bottom_uv - vld1.8 {d0}, [r1]! - vld1.8 {d8}, [r2]! - - mov r9, #2 ; 16 >> 3 - -extra_top_bottom_16 - subs r9, r9, #1 - - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - vst1.8 {d0}, [r5], lr - vst1.8 {d8}, [r6], lr - bne extra_top_bottom_16 - - sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride) - add r6, r2, lr ; src_ptr2 + plane_stride - subs r7, r7, #1 - bne extra_top_bottom_uv - - b end_of_border_copy_uv - - ENDP - END diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c index fac7bbc1b..d408eb311 100644 --- a/vpx_scale/arm/neon/yv12extend_arm.c +++ b/vpx_scale/arm/neon/yv12extend_arm.c @@ -17,5 +17,5 @@ extern void vp8_yv12_copy_frame_func_neon( void vp8_yv12_copy_frame_neon(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc) { vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); - vp8_yv12_extend_frame_borders_neon(dst_ybc); + vp8_yv12_extend_frame_borders_c(dst_ybc); } diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c index 675d905ae..827bce789 100644 --- a/vpx_scale/generic/yv12config.c +++ b/vpx_scale/generic/yv12config.c @@ -81,6 +81,8 @@ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, ybf->y_height = aligned_height; ybf->y_stride = y_stride; + ybf->uv_crop_width = (width + 1) / 2; + ybf->uv_crop_height = (height + 1) / 2; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c index 614602a03..036a50537 100644 --- a/vpx_scale/generic/yv12extend.c +++ b/vpx_scale/generic/yv12extend.c @@ -56,6 +56,9 @@ static void extend_plane(uint8_t *const src, int src_stride, } void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { + const int uv_border = ybf->border / 2; + + assert(ybf->border % 2 == 0); assert(ybf->y_height - ybf->y_crop_height < 16); assert(ybf->y_width - ybf->y_crop_width < 16); assert(ybf->y_height - ybf->y_crop_height >= 0); @@ -68,16 +71,16 @@ void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { ybf->border + ybf->y_width - ybf->y_crop_width); extend_plane(ybf->u_buffer, ybf->uv_stride, - (ybf->y_crop_width + 1) / 2, (ybf->y_crop_height + 1) / 2, - ybf->border / 2, ybf->border / 2, - (ybf->border + ybf->y_height - ybf->y_crop_height + 1) / 2, - (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2); + ybf->uv_crop_width, ybf->uv_crop_height, + uv_border, uv_border, + uv_border + ybf->uv_height - ybf->uv_crop_height, + uv_border + ybf->uv_width - ybf->uv_crop_width); extend_plane(ybf->v_buffer, ybf->uv_stride, - (ybf->y_crop_width + 1) / 2, (ybf->y_crop_height + 1) / 2, - ybf->border / 2, ybf->border / 2, - (ybf->border + ybf->y_height - ybf->y_crop_height + 1) / 2, - (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2); + ybf->uv_crop_width, ybf->uv_crop_height, + uv_border, uv_border, + uv_border + ybf->uv_height - ybf->uv_crop_height, + uv_border + ybf->uv_width - ybf->uv_crop_width); } #if CONFIG_VP9 diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 95e7483e6..1fa41afba 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -12,7 +12,6 @@ SCALE_SRCS-yes += vpx_scale_rtcd.pl #neon SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM) SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM) -SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM) SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/yv12extend_arm.c #mips(dspr2) diff --git a/vpx_scale/vpx_scale_rtcd.pl b/vpx_scale/vpx_scale_rtcd.pl index 2e3f1ffbe..5a7f973b2 100644 --- a/vpx_scale/vpx_scale_rtcd.pl +++ b/vpx_scale/vpx_scale_rtcd.pl @@ -17,8 +17,6 @@ if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") { } add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf"; -specialize qw/vp8_yv12_extend_frame_borders neon_asm/; -$vp8_yv12_extend_frame_borders_neon_asm=vp8_yv12_extend_frame_borders_neon; add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; specialize qw/vp8_yv12_copy_frame neon_asm/; |