diff options
-rw-r--r-- | test/i420_video_source.h | 94 | ||||
-rw-r--r-- | test/test-data.mk | 3 | ||||
-rw-r--r-- | test/test-data.sha1 | 3 | ||||
-rw-r--r-- | test/test.mk | 2 | ||||
-rw-r--r-- | test/vp9_end_to_end_test.cc | 155 | ||||
-rw-r--r-- | test/yuv_video_source.h | 151 | ||||
-rw-r--r-- | vp8/encoder/denoising.c | 20 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 5 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_avg.c | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 89 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 37 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 10 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_denoiser_sse2.c | 56 |
18 files changed, 474 insertions, 205 deletions
diff --git a/test/i420_video_source.h b/test/i420_video_source.h index c3315f9ce..0a184805c 100644 --- a/test/i420_video_source.h +++ b/test/i420_video_source.h @@ -13,104 +13,22 @@ #include <cstdlib> #include <string> -#include "test/video_source.h" +#include "test/yuv_video_source.h" namespace libvpx_test { // This class extends VideoSource to allow parsing of raw yv12 // so that we can do actual file encodes. -class I420VideoSource : public VideoSource { +class I420VideoSource : public YUVVideoSource { public: I420VideoSource(const std::string &file_name, unsigned int width, unsigned int height, int rate_numerator, int rate_denominator, unsigned int start, int limit) - : file_name_(file_name), - input_file_(NULL), - img_(NULL), - start_(start), - limit_(limit), - frame_(0), - width_(0), - height_(0), - framerate_numerator_(rate_numerator), - framerate_denominator_(rate_denominator) { - // This initializes raw_sz_, width_, height_ and allocates an img. - SetSize(width, height); - } - - virtual ~I420VideoSource() { - vpx_img_free(img_); - if (input_file_) - fclose(input_file_); - } - - virtual void Begin() { - if (input_file_) - fclose(input_file_); - input_file_ = OpenTestDataFile(file_name_); - ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " - << file_name_; - if (start_) { - fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET); - } - - frame_ = start_; - FillFrame(); - } - - virtual void Next() { - ++frame_; - FillFrame(); - } - - virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } - - // Models a stream where Timebase = 1/FPS, so pts == frame. - virtual vpx_codec_pts_t pts() const { return frame_; } - - virtual unsigned long duration() const { return 1; } - - virtual vpx_rational_t timebase() const { - const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; - return t; - } - - virtual unsigned int frame() const { return frame_; } - - virtual unsigned int limit() const { return limit_; } - - void SetSize(unsigned int width, unsigned int height) { - if (width != width_ || height != height_) { - vpx_img_free(img_); - img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1); - ASSERT_TRUE(img_ != NULL); - width_ = width; - height_ = height; - raw_sz_ = width * height * 3 / 2; - } - } - - virtual void FillFrame() { - ASSERT_TRUE(input_file_ != NULL); - // Read a frame from input_file. - if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { - limit_ = frame_; - } - } - - protected: - std::string file_name_; - FILE *input_file_; - vpx_image_t *img_; - size_t raw_sz_; - unsigned int start_; - unsigned int limit_; - unsigned int frame_; - unsigned int width_; - unsigned int height_; - int framerate_numerator_; - int framerate_denominator_; + : YUVVideoSource(file_name, VPX_IMG_FMT_I420, + width, height, + rate_numerator, rate_denominator, + start, limit) {} }; } // namespace libvpx_test diff --git a/test/test-data.mk b/test/test-data.mk index c50b1c746..e4dae3a95 100644 --- a/test/test-data.mk +++ b/test/test-data.mk @@ -7,12 +7,15 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m diff --git a/test/test-data.sha1 b/test/test-data.sha1 index 2dc64d334..69d1d2f25 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -17,12 +17,15 @@ d637297561dd904eb2c97a9015deeb31c4a1e8d2 invalid-vp90-2-08-tile_1x4_frame_paral a432f96ff0a787268e2f94a8092ab161a18d1b06 park_joy_90p_10_420.y4m 0b194cc312c3a2e84d156a221b0a5eb615dfddc5 park_joy_90p_10_422.y4m ff0e0a21dc2adc95b8c1b37902713700655ced17 park_joy_90p_10_444.y4m +c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 park_joy_90p_10_440.yuv 614c32ae1eca391e867c70d19974f0d62664dd99 park_joy_90p_12_420.y4m c92825f1ea25c5c37855083a69faac6ac4641a9e park_joy_90p_12_422.y4m b592189b885b6cc85db55cc98512a197d73d3b34 park_joy_90p_12_444.y4m +82c1bfcca368c2f22bad7d693d690d5499ecdd11 park_joy_90p_12_440.yuv 4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c park_joy_90p_8_420.y4m 7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 park_joy_90p_8_422.y4m bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 park_joy_90p_8_444.y4m +81e1f3843748438b8f2e71db484eb22daf72e939 park_joy_90p_8_440.yuv b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m 5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf 65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf diff --git a/test/test.mk b/test/test.mk index 1bd732d51..30c13a1d5 100644 --- a/test/test.mk +++ b/test/test.mk @@ -23,6 +23,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc @@ -38,6 +39,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.h diff --git a/test/vp9_end_to_end_test.cc b/test/vp9_end_to_end_test.cc new file mode 100644 index 000000000..a8f679342 --- /dev/null +++ b/test/vp9_end_to_end_test.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace { + +const unsigned int kWidth = 160; +const unsigned int kHeight = 90; +const unsigned int kFramerate = 50; +const unsigned int kFrames = 10; +const int kBitrate = 500; +const int kCpuUsed = 2; +const double psnr_threshold = 35.0; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + vpx_img_fmt fmt; + vpx_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +const TestVideoParam TestVectors[] = { + {"park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0}, + {"park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, VPX_BITS_8, 1}, + {"park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1}, + {"park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1}, +#if CONFIG_VP9_HIGHBITDEPTH + {"park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2}, + {"park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3}, + {"park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3}, + {"park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3}, + {"park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2}, + {"park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3}, + {"park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3}, + {"park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3}, +#endif // CONFIG_VP9_HIGHBITDEPTH +}; + +int is_extension_y4m(const char *filename) { + const char *dot = strrchr(filename, '.'); + if (!dot || dot == filename) + return 0; + else + return !strcmp(dot, ".y4m"); +} + +class EndToEndTestLarge + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, \ + TestVideoParam> { + protected: + EndToEndTestLarge() + : EncoderTest(GET_PARAM(0)), + psnr_(0.0), + nframes_(0), + encoding_mode_(GET_PARAM(1)) { + } + + virtual ~EndToEndTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + if (encoding_mode_ != ::libvpx_test::kRealTime) { + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = VPX_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = VPX_CBR; + } + test_video_param_ = GET_PARAM(2); + } + + virtual void BeginPassHook(unsigned int) { + psnr_ = 0.0; + nframes_ = 0; + } + + virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, kCpuUsed); + if (encoding_mode_ != ::libvpx_test::kRealTime) { + encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); + encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); + encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); + encoder->Control(VP8E_SET_ARNR_TYPE, 3); + } + } + } + + double GetAveragePsnr() const { + if (nframes_) + return psnr_ / nframes_; + return 0.0; + } + + TestVideoParam test_video_param_; + + private: + double psnr_; + unsigned int nframes_; + libvpx_test::TestMode encoding_mode_; +}; + +TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = VPX_CODEC_USE_PSNR; + + libvpx_test::VideoSource *video; + if (is_extension_y4m(test_video_param_.filename)) { + video = new libvpx_test::Y4mVideoSource(test_video_param_.filename, + 0, kFrames); + } else { + video = new libvpx_test::YUVVideoSource(test_video_param_.filename, + test_video_param_.fmt, + kWidth, kHeight, + kFramerate, 1, 0, kFrames); + } + + ASSERT_NO_FATAL_FAILURE(RunLoop(video)); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, psnr_threshold); + delete(video); +} + +VP9_INSTANTIATE_TEST_CASE( + EndToEndTestLarge, + ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood), + ::testing::ValuesIn(TestVectors)); + +} // namespace diff --git a/test/yuv_video_source.h b/test/yuv_video_source.h new file mode 100644 index 000000000..3c852b242 --- /dev/null +++ b/test/yuv_video_source.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TEST_YUV_VIDEO_SOURCE_H_ +#define TEST_YUV_VIDEO_SOURCE_H_ + +#include <cstdio> +#include <cstdlib> +#include <string> + +#include "test/video_source.h" +#include "vpx/vpx_image.h" + +namespace libvpx_test { + +// This class extends VideoSource to allow parsing of raw YUV +// formats of various color sampling and bit-depths so that we can +// do actual file encodes. +class YUVVideoSource : public VideoSource { + public: + YUVVideoSource(const std::string &file_name, vpx_img_fmt format, + unsigned int width, unsigned int height, + int rate_numerator, int rate_denominator, + unsigned int start, int limit) + : file_name_(file_name), + input_file_(NULL), + img_(NULL), + start_(start), + limit_(limit), + frame_(0), + width_(0), + height_(0), + format_(VPX_IMG_FMT_NONE), + framerate_numerator_(rate_numerator), + framerate_denominator_(rate_denominator) { + // This initializes format_, raw_size_, width_, height_ and allocates img. + SetSize(width, height, format); + } + + virtual ~YUVVideoSource() { + vpx_img_free(img_); + if (input_file_) + fclose(input_file_); + } + + virtual void Begin() { + if (input_file_) + fclose(input_file_); + input_file_ = OpenTestDataFile(file_name_); + ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " + << file_name_; + if (start_) + fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET); + + frame_ = start_; + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + virtual vpx_codec_pts_t pts() const { return frame_; } + + virtual unsigned long duration() const { return 1; } + + virtual vpx_rational_t timebase() const { + const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; + return t; + } + + virtual unsigned int frame() const { return frame_; } + + virtual unsigned int limit() const { return limit_; } + + virtual void SetSize(unsigned int width, unsigned int height, + vpx_img_fmt format) { + if (width != width_ || height != height_ || format != format_) { + vpx_img_free(img_); + img_ = vpx_img_alloc(NULL, format, width, height, 1); + ASSERT_TRUE(img_ != NULL); + width_ = width; + height_ = height; + format_ = format; + switch (format) { + case VPX_IMG_FMT_I420: + raw_size_ = width * height * 3 / 2; + break; + case VPX_IMG_FMT_I422: + raw_size_ = width * height * 2; + break; + case VPX_IMG_FMT_I440: + raw_size_ = width * height * 2; + break; + case VPX_IMG_FMT_I444: + raw_size_ = width * height * 3; + break; + case VPX_IMG_FMT_I42016: + raw_size_ = width * height * 3; + break; + case VPX_IMG_FMT_I42216: + raw_size_ = width * height * 4; + break; + case VPX_IMG_FMT_I44016: + raw_size_ = width * height * 4; + break; + case VPX_IMG_FMT_I44416: + raw_size_ = width * height * 6; + break; + default: + ASSERT_TRUE(0); + } + } + } + + virtual void FillFrame() { + ASSERT_TRUE(input_file_ != NULL); + // Read a frame from input_file. + if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) { + limit_ = frame_; + } + } + + protected: + std::string file_name_; + FILE *input_file_; + vpx_image_t *img_; + size_t raw_size_; + unsigned int start_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; + vpx_img_fmt format_; + int framerate_numerator_; + int framerate_denominator_; +}; + +} // namespace libvpx_test + +#endif // TEST_YUV_VIDEO_SOURCE_H_ diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index 12f9734a1..75b2a3be4 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -390,9 +390,9 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) { denoiser->denoise_pars.scale_motion_thresh = 16; denoiser->denoise_pars.scale_increase_filter = 1; denoiser->denoise_pars.denoise_mv_bias = 60; - denoiser->denoise_pars.pickmode_mv_bias = 60; - denoiser->denoise_pars.qp_thresh = 100; - denoiser->denoise_pars.consec_zerolast = 10; + denoiser->denoise_pars.pickmode_mv_bias = 75; + denoiser->denoise_pars.qp_thresh = 85; + denoiser->denoise_pars.consec_zerolast = 15; denoiser->denoise_pars.spatial_blur = 20; } } @@ -453,17 +453,17 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, // Bitrate thresholds and noise metric (nmse) thresholds for switching to // aggressive mode. // TODO(marpan): Adjust thresholds, including effect on resolution. - denoiser->bitrate_threshold = 200000; // (bits/sec). + denoiser->bitrate_threshold = 300000; // (bits/sec). denoiser->threshold_aggressive_mode = 35; - if (width * height > 640 * 480) { - denoiser->bitrate_threshold = 500000; - denoiser->threshold_aggressive_mode = 100; + if (width * height > 1280 * 720) { + denoiser->bitrate_threshold = 2000000; + denoiser->threshold_aggressive_mode = 1400; } else if (width * height > 960 * 540) { denoiser->bitrate_threshold = 800000; denoiser->threshold_aggressive_mode = 150; - } else if (width * height > 1280 * 720) { - denoiser->bitrate_threshold = 2000000; - denoiser->threshold_aggressive_mode = 1400; + } else if (width * height > 640 * 480) { + denoiser->bitrate_threshold = 500000; + denoiser->threshold_aggressive_mode = 100; } return 0; } diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index de389e7af..d15ddec73 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1114,6 +1114,11 @@ specialize qw/vp9_get_mb_ss/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; specialize qw/vp9_avg_8x8 sse2/; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p"; + specialize qw/vp9_highbd_avg_8x8/; +} + # ENCODEMB INVOKE add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 3c9469c4c..baf6ab7ef 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -232,6 +232,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, cm->frame_refs[0].buf->corrupted = 1; } + pbi->ready_for_new_data = 0; + // Check if the previous frame was a frame without any references to it. if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0) cm->release_fb_cb(cm->cb_priv, @@ -279,8 +281,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, cm->current_video_frame++; } - pbi->ready_for_new_data = 0; - cm->error.setjmp = 0; return retcode; } @@ -296,12 +296,12 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, if (pbi->ready_for_new_data == 1) return ret; + pbi->ready_for_new_data = 1; + /* no raw frame to show!!! */ if (!cm->show_frame) return ret; - pbi->ready_for_new_data = 1; - #if CONFIG_VP9_POSTPROC if (!cm->show_existing_frame) { ret = vp9_post_proc_frame(cm, sd, flags); diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c index 22c6cc4fc..e9810c894 100644 --- a/vp9/encoder/vp9_avg.c +++ b/vp9/encoder/vp9_avg.c @@ -7,6 +7,7 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include "vp9/common/vp9_common.h" #include "vpx_ports/mem.h" unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { @@ -17,3 +18,16 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { return (sum + 32) >> 6; } + +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { + int i, j; + int sum = 0; + const uint16_t* s = CONVERT_TO_SHORTPTR(s8); + for (i = 0; i < 8; ++i, s+=p) + for (j = 0; j < 8; sum += s[j], ++j) {} + + return (sum + 32) >> 6; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 420ec0b51..6eff8c501 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -533,8 +533,19 @@ static void choose_partitioning(VP9_COMP *cpi, int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) { - int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); - int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); + int s_avg, d_avg; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp); + d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp); + } else { + s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); + d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); + } +#else + s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); + d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); +#endif sum = s_avg - d_avg; sse = sum * sum; } @@ -1522,9 +1533,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE subsize; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; - RD_COST last_part_rdc = {INT_MAX, INT64_MAX, INT64_MAX}; - RD_COST none_rdc = {INT_MAX, INT64_MAX, INT64_MAX}; - RD_COST chosen_rdc = {INT_MAX, INT64_MAX, INT64_MAX}; + RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type; @@ -1537,6 +1546,10 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, assert(num_4x4_blocks_wide_lookup[bsize] == num_4x4_blocks_high_lookup[bsize]); + vp9_rd_cost_reset(&last_part_rdc); + vp9_rd_cost_reset(&none_rdc); + vp9_rd_cost_reset(&chosen_rdc); + partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); @@ -1598,16 +1611,15 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { - RD_COST tmp_rdc = {0, 0, 0}; + RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; + vp9_rd_cost_init(&tmp_rdc); update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - last_part_rdc.rate = INT_MAX; - last_part_rdc.dist = INT64_MAX; - last_part_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; @@ -1620,17 +1632,16 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { - RD_COST tmp_rdc = {0, 0, 0}; + RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; + vp9_rd_cost_init(&tmp_rdc); update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - last_part_rdc.rate = INT_MAX; - last_part_rdc.dist = INT64_MAX; - last_part_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; @@ -1651,19 +1662,17 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; - RD_COST tmp_rdc = {0, 0, 0}; - + RD_COST tmp_rdc; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; + vp9_rd_cost_init(&tmp_rdc); rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, i != 3, pc_tree->split[i]); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - last_part_rdc.rate = INT_MAX; - last_part_rdc.dist = INT64_MAX; - last_part_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; @@ -1710,15 +1719,12 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, - split_subsize, &pc_tree->split[i]->none, - INT64_MAX); + split_subsize, &pc_tree->split[i]->none, INT64_MAX); restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - chosen_rdc.rate = INT_MAX; - chosen_rdc.dist = INT64_MAX; - chosen_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&chosen_rdc); break; } @@ -2123,9 +2129,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, PICK_MODE_CONTEXT *ctx = &pc_tree->none; int i, pl; BLOCK_SIZE subsize; - RD_COST this_rdc = {0, 0, 0}; - RD_COST sum_rdc = {0, 0, 0}; - RD_COST best_rdc = {INT_MAX, INT64_MAX, best_rd}; + RD_COST this_rdc, sum_rdc, best_rdc; int do_split = bsize >= BLOCK_8X8; int do_rect = 1; @@ -2153,6 +2157,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); + vp9_rd_cost_init(&this_rdc); + vp9_rd_cost_init(&sum_rdc); + vp9_rd_cost_reset(&best_rdc); + best_rdc.rdcost = best_rd; + set_offsets(cpi, tile, mi_row, mi_col, bsize); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) @@ -2574,16 +2583,15 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, sf->always_this_block_size); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); - } else if (cpi->partition_search_skippable_frame || - sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { + } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION && - cm->frame_type != KEY_FRAME ) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION && + cm->frame_type != KEY_FRAME ) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); @@ -2835,8 +2843,13 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, this_rate += cpi->partition_cost[pl][PARTITION_NONE]; sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); if (sum_rd < best_rd) { - int64_t stop_thresh = 4096; - int64_t stop_thresh_rd; + int dist_breakout_thr = sf->partition_search_breakout_dist_thr; + int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr; + + dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + rate_breakout_thr *= num_pels_log2_lookup[bsize]; best_rate = this_rate; best_dist = this_dist; @@ -2844,14 +2857,9 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // Adjust threshold according to partition size. - stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); - - stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); - // If obtained distortion is very small, choose current partition - // and stop splitting. - if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + if (!x->e_mbd.lossless && + this_rate < rate_breakout_thr && + this_dist < dist_breakout_thr) { do_split = 0; do_rect = 0; } @@ -3176,7 +3184,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist, cpi->pc_root); break; - case VAR_BASED_FIXED_PARTITION: case FIXED_PARTITION: bsize = sf->partition_search_type == FIXED_PARTITION ? sf->always_this_block_size : diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 96c3e0aa4..f1baf8323 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -66,13 +66,6 @@ static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { *b = temp; } -static int gfboost_qadjust(int qindex, vpx_bit_depth_t bit_depth) { - const double q = vp9_convert_qindex_to_q(qindex, bit_depth); - return (int)((0.00000828 * q * q * q) + - (-0.0055 * q * q) + - (1.32 * q) + 79.3); -} - // Resets the first pass file to the given position using a relative seek from // the current position. static void reset_fpf_position(TWO_PASS *p, @@ -1317,14 +1310,15 @@ static double calc_frame_boost(VP9_COMP *cpi, double this_frame_mv_in_out, double max_boost) { double frame_boost; - const double lq = vp9_convert_qindex_to_q(cpi->rc.last_q[INTER_FRAME], - cpi->common.bit_depth); - const double q_correction = MIN((0.8 + (lq * 0.001)), 1.0); + const double lq = + vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME], + cpi->common.bit_depth); + const double boost_correction = MIN((0.5 + (lq * 0.015)), 1.5); // Underlying boost factor is based on inter error ratio. frame_boost = (BASELINE_ERR_PER_MB * cpi->common.MBs) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error); - frame_boost = frame_boost * BOOST_FACTOR * q_correction; + frame_boost = frame_boost * BOOST_FACTOR * boost_correction; // Increase boost for frames where new data coming into frame (e.g. zoom out). // Slightly reduce boost if there is a net balance of motion out of the frame @@ -1335,7 +1329,7 @@ static double calc_frame_boost(VP9_COMP *cpi, else frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - return MIN(frame_boost, max_boost * q_correction); + return MIN(frame_boost, max_boost * boost_correction); } static int calc_arf_boost(VP9_COMP *cpi, int offset, @@ -1874,19 +1868,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); // Calculate the extra bits to be used for boosted frame(s) - { - int q = rc->last_q[INTER_FRAME]; - int boost = - (rc->gfu_boost * gfboost_qadjust(q, cpi->common.bit_depth)) / 100; - - // Set max and minimum boost and hence minimum allocation. - boost = clamp(boost, MIN_ARF_GF_BOOST, - (rc->baseline_gf_interval + 1) * 200); - - // Calculate the extra bits to be used for boosted frame(s) - gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, - boost, gf_group_bits); - } + gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, + rc->gfu_boost, gf_group_bits); // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= (int64_t)gf_group_err; @@ -2380,7 +2363,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { section_target_bandwidth); twopass->active_worst_quality = tmp_q; rc->ni_av_qi = tmp_q; + rc->last_q[INTER_FRAME] = tmp_q; rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth); + rc->avg_frame_qindex[INTER_FRAME] = tmp_q; + rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2; + rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME]; } vp9_zero(this_frame); if (EOF == input_stats(twopass, &this_frame)) diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 75c396433..7f526fc42 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -44,6 +44,18 @@ // Factor to weigh the rate for switchable interp filters. #define SWITCHABLE_INTERP_RATE_FACTOR 1 +void vp9_rd_cost_reset(RD_COST *rd_cost) { + rd_cost->rate = INT_MAX; + rd_cost->dist = INT64_MAX; + rd_cost->rdcost = INT64_MAX; +} + +void vp9_rd_cost_init(RD_COST *rd_cost) { + rd_cost->rate = 0; + rd_cost->dist = 0; + rd_cost->rdcost = 0; +} + // The baseline rd thresholds for breaking out of the rd loop for // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for block size. diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 33fb4ac94..1aa52663a 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -123,6 +123,11 @@ typedef struct RD_COST { int64_t rdcost; } RD_COST; +// Reset the rate distortion cost values to maximum (invalid) value. +void vp9_rd_cost_reset(RD_COST *rd_cost); +// Initialize the rate distortion cost values to zero. +void vp9_rd_cost_init(RD_COST *rd_cost); + struct TileInfo; struct VP9_COMP; struct macroblock; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7565cc5c9..eca8e5880 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1140,12 +1140,14 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, *sse = 0; *skippable = 1; - for (plane = 1; plane < MAX_MB_PLANE && is_cost_valid; ++plane) { + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); - if (pnrate == INT_MAX) + if (pnrate == INT_MAX) { is_cost_valid = 0; + break; + } *rate += pnrate; *distortion += pndist; *sse += pnsse; @@ -3392,6 +3394,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (best_mode_index < 0 || best_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } @@ -3562,6 +3565,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x, rd_cost->rdcost = this_rd; if (this_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } @@ -4113,6 +4117,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (best_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index bec77d71f..9e3ee2c94 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -275,6 +275,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; + + if (MIN(cm->width, cm->height) >= 720) + sf->partition_search_breakout_dist_thr = (1 << 25); + else + sf->partition_search_breakout_dist_thr = (1 << 23); + sf->partition_search_breakout_rate_thr = 200; } if (speed >= 6) { diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index cc6c2e52a..951b4af22 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -149,16 +149,12 @@ typedef enum { typedef enum { // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, + SEARCH_PARTITION, // Always use a fixed size partition - FIXED_PARTITION = 1, + FIXED_PARTITION, - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - REFERENCE_PARTITION = 3, + REFERENCE_PARTITION, // Use an arbitrary partitioning scheme based on source variance within // a 64X64 SB diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c index bf400d38b..bf5fa889f 100644 --- a/vp9/encoder/x86/vp9_denoiser_sse2.c +++ b/vp9/encoder/x86/vp9_denoiser_sse2.c @@ -41,40 +41,40 @@ static INLINE int sum_diff_16x1(__m128i acc_diff) { static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y, - const __m128i k_0, - const __m128i k_4, - const __m128i k_8, - const __m128i k_16, - const __m128i l3, - const __m128i l32, - const __m128i l21, + const __m128i *k_0, + const __m128i *k_4, + const __m128i *k_8, + const __m128i *k_16, + const __m128i *l3, + const __m128i *l32, + const __m128i *l21, __m128i acc_diff) { // Calculate differences - const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); + const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = _mm_loadu_si128( - (__m128i *)(&mc_running_avg_y[0])); + (const __m128i *)(&mc_running_avg_y[0])); __m128i v_running_avg_y; const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. - const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); + const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0); // Clamp absolute difference to 16 to be used to get mask. Doing this // allows us to use _mm_cmpgt_epi8, which operates on signed byte. const __m128i clamped_absdiff = _mm_min_epu8( - _mm_or_si128(pdiff, ndiff), k_16); + _mm_or_si128(pdiff, ndiff), *k_16); // Get masks for l2 l1 and l0 adjustments. - const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); - const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); - const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); + const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff); + const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff); + const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff); // Get adjustments for l2, l1, and l0. - __m128i adj2 = _mm_and_si128(mask2, l32); - const __m128i adj1 = _mm_and_si128(mask1, l21); + __m128i adj2 = _mm_and_si128(mask2, *l32); + const __m128i adj1 = _mm_and_si128(mask1, *l21); const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); __m128i adj, padj, nadj; // Combine the adjustments and get absolute adjustments. adj2 = _mm_add_epi8(adj2, adj1); - adj = _mm_sub_epi8(l3, adj2); + adj = _mm_sub_epi8(*l3, adj2); adj = _mm_andnot_si128(mask0, adj); adj = _mm_or_si128(adj, adj0); @@ -103,9 +103,9 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(const uint8_t *sig, __m128i acc_diff) { __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0])); // Calculate differences. - const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); + const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = - _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0])); + _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0])); const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. @@ -178,8 +178,8 @@ static int vp9_denoiser_4xM_sse2(const uint8_t *sig, int sig_stride, acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], - k_0, k_4, k_8, k_16, - l3, l32, l21, acc_diff); + &k_0, &k_4, &k_8, &k_16, + &l3, &l32, &l21, acc_diff); vpx_memcpy(running_avg_y, running_buffer[r], 4); vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4); vpx_memcpy(running_avg_y + avg_y_stride * 2, @@ -279,8 +279,8 @@ static int vp9_denoiser_8xM_sse2(const uint8_t *sig, int sig_stride, acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], - k_0, k_4, k_8, k_16, - l3, l32, l21, acc_diff); + &k_0, &k_4, &k_8, &k_16, + &l3, &l32, &l21, acc_diff); vpx_memcpy(running_avg_y, running_buffer[r], 8); vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8); // Update pointers for next iteration. @@ -357,9 +357,9 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride, const __m128i l21 = _mm_set1_epi8(1); int sum_diff = 0; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - acc_diff[i][j] = _mm_setzero_si128(); + for (c = 0; c < 4; ++c) { + for (r = 0; r < 4; ++r) { + acc_diff[c][r] = _mm_setzero_si128(); } } @@ -368,8 +368,8 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride, acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2( sig, mc_running_avg_y, running_avg_y, - k_0, k_4, k_8, k_16, - l3, l32, l21, acc_diff[c>>4][r>>4]); + &k_0, &k_4, &k_8, &k_16, + &l3, &l32, &l21, acc_diff[c>>4][r>>4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; |