summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README1
-rw-r--r--build/make/configure.sh11
-rwxr-xr-xbuild/make/iosbuild.sh5
-rwxr-xr-xconfigure21
-rw-r--r--examples/vp9_spatial_svc_encoder.c7
-rw-r--r--ivfdec.c8
-rw-r--r--libs.mk24
-rw-r--r--test/bench.cc38
-rw-r--r--test/bench.h30
-rw-r--r--test/cpu_speed_test.cc2
-rw-r--r--test/dct_test.cc10
-rw-r--r--test/encode_perf_test.cc2
-rw-r--r--test/encode_test_driver.cc2
-rw-r--r--test/encode_test_driver.h2
-rw-r--r--test/invalid_file_test.cc1
-rw-r--r--test/ivf_video_source.h2
-rw-r--r--test/pp_filter_test.cc198
-rw-r--r--test/svc_datarate_test.cc110
-rw-r--r--test/test-data.mk2
-rw-r--r--test/test-data.sha12
-rw-r--r--test/test.mk2
-rw-r--r--test/variance_test.cc21
-rw-r--r--test/vp9_datarate_test.cc13
-rw-r--r--test/vp9_ethread_test.cc2
-rw-r--r--test/vp9_quantize_test.cc118
-rw-r--r--third_party/libwebm/Android.mk2
-rw-r--r--third_party/libwebm/README.libvpx14
-rw-r--r--third_party/libwebm/common/file_util.cc15
-rw-r--r--third_party/libwebm/common/file_util.h5
-rw-r--r--third_party/libwebm/common/hdr_util.cc8
-rw-r--r--third_party/libwebm/common/hdr_util.h10
-rw-r--r--third_party/libwebm/mkvmuxer/mkvmuxer.cc25
-rw-r--r--third_party/libwebm/mkvparser/mkvparser.cc35
-rw-r--r--tools/tiny_ssim.c43
-rw-r--r--vp8/common/postproc.c2
-rw-r--r--vp8/decoder/decodeframe.c6
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c22
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h2
-rw-r--r--vp9/encoder/vp9_encodeframe.c53
-rw-r--r--vp9/encoder/vp9_encoder.c145
-rw-r--r--vp9/encoder/vp9_encoder.h3
-rw-r--r--vp9/encoder/vp9_firstpass.c64
-rw-r--r--vp9/encoder/vp9_firstpass.h12
-rw-r--r--vp9/encoder/vp9_pickmode.c95
-rw-r--r--vp9/encoder/vp9_ratectrl.c211
-rw-r--r--vp9/encoder/vp9_ratectrl.h8
-rw-r--r--vp9/encoder/vp9_rd.h4
-rw-r--r--vp9/encoder/vp9_rdopt.c14
-rw-r--r--vp9/encoder/vp9_speed_features.c25
-rw-r--r--vp9/encoder/vp9_speed_features.h22
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c66
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h12
-rw-r--r--vp9/vp9_cx_iface.c6
-rw-r--r--vpx/vp8cx.h6
-rw-r--r--vpx_dsp/arm/avg_pred_neon.c42
-rw-r--r--vpx_dsp/arm/subtract_neon.c83
-rw-r--r--vpx_dsp/ppc/deblock_vsx.c378
-rw-r--r--vpx_dsp/ppc/inv_txfm_vsx.c86
-rw-r--r--vpx_dsp/ppc/quantize_vsx.c263
-rw-r--r--vpx_dsp/ppc/types_vsx.h14
-rw-r--r--vpx_dsp/ppc/variance_vsx.c174
-rw-r--r--vpx_dsp/vpx_dsp.mk1
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl48
-rw-r--r--vpx_ports/config.h16
64 files changed, 2070 insertions, 604 deletions
diff --git a/README b/README
index a900c8077..49407ed9f 100644
--- a/README
+++ b/README
@@ -76,7 +76,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv8-linux-gcc
mips32-linux-gcc
mips64-linux-gcc
- ppc64-linux-gcc
ppc64le-linux-gcc
sparc-solaris-gcc
x86-android-gcc
diff --git a/build/make/configure.sh b/build/make/configure.sh
index c4e3b5141..480b2d0ea 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -719,11 +719,8 @@ process_common_toolchain() {
*sparc*)
tgt_isa=sparc
;;
- power*64*-*)
- tgt_isa=ppc64
- ;;
- power*)
- tgt_isa=ppc
+ power*64le*-*)
+ tgt_isa=ppc64le
;;
*mips64el*)
tgt_isa=mips64
@@ -835,7 +832,7 @@ process_common_toolchain() {
IOS_VERSION_MIN="8.0"
else
IOS_VERSION_OPTIONS=""
- IOS_VERSION_MIN="6.0"
+ IOS_VERSION_MIN="7.0"
fi
# Handle darwin variants. Newer SDKs allow targeting older
@@ -1221,7 +1218,7 @@ EOF
check_add_asflags -march=${tgt_isa}
check_add_asflags -KPIC
;;
- ppc*)
+ ppc64le*)
link_with_cc=gcc
setup_gnu_toolchain
check_gcc_machine_option "vsx"
diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh
index 365a8c013..2442a282d 100755
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -132,7 +132,8 @@ create_vpx_framework_config_shim() {
done
# Consume the last line of output from the loop: We don't want it.
- sed -i '' -e '$d' "${config_file}"
+ sed -i.bak -e '$d' "${config_file}"
+ rm "${config_file}.bak"
printf "#endif\n\n" >> "${config_file}"
printf "#endif // ${include_guard}" >> "${config_file}"
@@ -350,7 +351,7 @@ if [ "$ENABLE_SHARED" = "yes" ]; then
IOS_VERSION_MIN="8.0"
else
IOS_VERSION_OPTIONS=""
- IOS_VERSION_MIN="6.0"
+ IOS_VERSION_MIN="7.0"
fi
if [ "${VERBOSE}" = "yes" ]; then
diff --git a/configure b/configure
index 702483f13..6c0adc7f9 100755
--- a/configure
+++ b/configure
@@ -116,7 +116,6 @@ all_platforms="${all_platforms} armv7s-darwin-gcc"
all_platforms="${all_platforms} armv8-linux-gcc"
all_platforms="${all_platforms} mips32-linux-gcc"
all_platforms="${all_platforms} mips64-linux-gcc"
-all_platforms="${all_platforms} ppc64-linux-gcc"
all_platforms="${all_platforms} ppc64le-linux-gcc"
all_platforms="${all_platforms} sparc-solaris-gcc"
all_platforms="${all_platforms} x86-android-gcc"
@@ -328,6 +327,7 @@ CONFIG_LIST="
multi_res_encoding
temporal_denoising
vp9_temporal_denoising
+ consistent_recode
coefficient_range_checking
vp9_highbitdepth
better_hw_compatibility
@@ -389,6 +389,7 @@ CMDLINE_SELECT="
multi_res_encoding
temporal_denoising
vp9_temporal_denoising
+ consistent_recode
coefficient_range_checking
better_hw_compatibility
vp9_highbitdepth
@@ -521,7 +522,7 @@ process_detect() {
# here rather than at option parse time because the target auto-detect
# magic happens after the command line has been parsed.
case "${tgt_os}" in
- linux|os2|darwin*|iphonesimulator*)
+ linux|os2|solaris|darwin*|iphonesimulator*)
# Supported platforms
;;
*)
@@ -593,6 +594,10 @@ EOF
check_header unistd.h # for sysconf(3) and friends.
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
+
+ if enabled neon && ! enabled external_build; then
+ check_header arm_neon.h || die "Unable to find arm_neon.h"
+ fi
}
process_toolchain() {
@@ -699,7 +704,7 @@ process_toolchain() {
soft_enable libyuv
;;
*-android-*)
- soft_enable webm_io
+ check_add_cxxflags -std=c++11 && soft_enable webm_io
soft_enable libyuv
# GTestLog must be modified to use Android logging utilities.
;;
@@ -708,7 +713,7 @@ process_toolchain() {
# x86 targets.
;;
*-iphonesimulator-*)
- soft_enable webm_io
+ check_add_cxxflags -std=c++11 && soft_enable webm_io
soft_enable libyuv
;;
*-win*)
@@ -718,9 +723,7 @@ process_toolchain() {
check_cxx "$@" <<EOF && soft_enable unit_tests
int z;
EOF
- check_cxx "$@" <<EOF && soft_enable webm_io
-int z;
-EOF
+ check_add_cxxflags -std=c++11 && soft_enable webm_io
check_cxx "$@" <<EOF && soft_enable libyuv
int z;
EOF
@@ -729,9 +732,7 @@ EOF
enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
int z;
EOF
- check_cxx "$@" <<EOF && soft_enable webm_io
-int z;
-EOF
+ check_add_cxxflags -std=c++11 && soft_enable webm_io
check_cxx "$@" <<EOF && soft_enable libyuv
int z;
EOF
diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index 091c6954d..3fd961bdc 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -622,6 +622,7 @@ int main(int argc, const char **argv) {
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t enc_cfg;
SvcContext svc_ctx;
+ vpx_svc_frame_drop_t svc_drop_frame;
uint32_t i;
uint32_t frame_cnt = 0;
vpx_image_t raw;
@@ -732,6 +733,12 @@ int main(int argc, const char **argv) {
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
+ svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP;
+ for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl)
+ svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh;
+ svc_drop_frame.max_consec_drop = INT_MAX;
+ vpx_codec_control(&codec, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
+
// Encode frames
while (!end_of_stream) {
vpx_codec_iter_t iter = NULL;
diff --git a/ivfdec.c b/ivfdec.c
index f64e594ab..3e179bc6e 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -76,12 +76,12 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
size_t frame_size = 0;
if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) {
- if (!feof(infile)) warn("Failed to read frame size\n");
+ if (!feof(infile)) warn("Failed to read frame size");
} else {
frame_size = mem_get_le32(raw_header);
if (frame_size > 256 * 1024 * 1024) {
- warn("Read invalid frame size (%u)\n", (unsigned int)frame_size);
+ warn("Read invalid frame size (%u)", (unsigned int)frame_size);
frame_size = 0;
}
@@ -92,7 +92,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
*buffer = new_buffer;
*buffer_size = 2 * frame_size;
} else {
- warn("Failed to allocate compressed data buffer\n");
+ warn("Failed to allocate compressed data buffer");
frame_size = 0;
}
}
@@ -100,7 +100,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
if (!feof(infile)) {
if (fread(*buffer, 1, frame_size, infile) != frame_size) {
- warn("Failed to read full frame\n");
+ warn("Failed to read full frame");
return 1;
}
diff --git a/libs.mk b/libs.mk
index 899d380a8..31eb98f1e 100644
--- a/libs.mk
+++ b/libs.mk
@@ -282,18 +282,6 @@ $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
$(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE)
-libvpx.ver: $(call enabled,CODEC_EXPORTS)
- @echo " [CREATE] $@"
- $(qexec)echo "{ global:" > $@
- $(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done
- $(qexec)echo "local: *; };" >> $@
-CLEAN-OBJS += libvpx.ver
-
-libvpx.syms: $(call enabled,CODEC_EXPORTS)
- @echo " [CREATE] $@"
- $(qexec)awk '{print "_"$$2}' $^ >$@
-CLEAN-OBJS += libvpx.syms
-
libvpx.def: $(call enabled,CODEC_EXPORTS)
@echo " [CREATE] $@"
$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
@@ -353,6 +341,18 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
CLEAN-OBJS += vpx.pc
endif
+libvpx.ver: $(call enabled,CODEC_EXPORTS)
+ @echo " [CREATE] $@"
+ $(qexec)echo "{ global:" > $@
+ $(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done
+ $(qexec)echo "local: *; };" >> $@
+CLEAN-OBJS += libvpx.ver
+
+libvpx.syms: $(call enabled,CODEC_EXPORTS)
+ @echo " [CREATE] $@"
+ $(qexec)awk '{print "_"$$2}' $^ >$@
+CLEAN-OBJS += libvpx.syms
+
#
# Rule to make assembler configuration file from C configuration file
#
diff --git a/test/bench.cc b/test/bench.cc
new file mode 100644
index 000000000..281b7411d
--- /dev/null
+++ b/test/bench.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <algorithm>
+
+#include "test/bench.h"
+#include "vpx_ports/vpx_timer.h"
+
+void AbstractBench::runNTimes(int n) {
+ for (int r = 0; r < VPX_BENCH_ROBUST_ITER; r++) {
+ vpx_usec_timer timer;
+ vpx_usec_timer_start(&timer);
+ for (int j = 0; j < n; ++j) {
+ run();
+ }
+ vpx_usec_timer_mark(&timer);
+ times[r] = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ }
+}
+
+void AbstractBench::printMedian(const char *title) {
+ std::sort(times, times + VPX_BENCH_ROBUST_ITER);
+ const int med = times[VPX_BENCH_ROBUST_ITER >> 1];
+ int sad = 0;
+ for (int t = 0; t < VPX_BENCH_ROBUST_ITER; t++) {
+ sad += abs(times[t] - med);
+ }
+ printf("[%10s] %s %.1f ms ( ±%.1f ms )\n", "BENCH ", title, med / 1000.0,
+ sad / (VPX_BENCH_ROBUST_ITER * 1000.0));
+}
diff --git a/test/bench.h b/test/bench.h
new file mode 100644
index 000000000..0b0cf10a4
--- /dev/null
+++ b/test/bench.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_BENCH_H_
+#define TEST_BENCH_H_
+
+// Number of iterations used to compute median run time.
+#define VPX_BENCH_ROBUST_ITER 15
+
+class AbstractBench {
+ public:
+ void runNTimes(int n);
+ void printMedian(const char *title);
+
+ protected:
+ // Implement this method and put the code to benchmark in it.
+ virtual void run() = 0;
+
+ private:
+ int times[VPX_BENCH_ROBUST_ITER];
+};
+
+#endif // TEST_BENCH_H_
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc
index 404b5b44f..34e35b065 100644
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -152,5 +152,5 @@ VP9_INSTANTIATE_TEST_CASE(CpuSpeedTest,
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(0, 9));
+ ::testing::Range(0, 10));
} // namespace
diff --git a/test/dct_test.cc b/test/dct_test.cc
index 10062150f..e8ad0cd5d 100644
--- a/test/dct_test.cc
+++ b/test/dct_test.cc
@@ -725,4 +725,14 @@ INSTANTIATE_TEST_CASE_P(SSE2, TransWHT,
::testing::Values(make_tuple(0, &wht_sse2_func_info, 0,
VPX_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+static const FuncInfo wht_vsx_func_info = {
+ &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_vsx>, 4, 1
+};
+
+INSTANTIATE_TEST_CASE_P(VSX, TransWHT,
+ ::testing::Values(make_tuple(0, &wht_vsx_func_info, 0,
+ VPX_BITS_8)));
+#endif // HAVE_VSX && !CONFIG_EMULATE_HARDWARE
} // namespace
diff --git a/test/encode_perf_test.cc b/test/encode_perf_test.cc
index 0bb435502..142d9e2da 100644
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -48,7 +48,7 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
};
-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8, 9 };
const int kEncodePerfTestThreads[] = { 1, 2, 4 };
#define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
index 63e972a00..b2cbc3f05 100644
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -201,7 +201,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
PreEncodeFrameHook(video, encoder.get());
encoder->EncodeFrame(video, frame_flags_);
- PostEncodeFrameHook();
+ PostEncodeFrameHook(encoder.get());
CxDataIterator iter = encoder->GetCxData();
diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
index a301e21cc..03624d110 100644
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -226,7 +226,7 @@ class EncoderTest {
virtual void PreEncodeFrameHook(VideoSource * /*video*/,
Encoder * /*encoder*/) {}
- virtual void PostEncodeFrameHook() {}
+ virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
// Hook to be called on every compressed data packet.
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index 43a4c6929..9cfaa1f1f 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -124,6 +124,7 @@ TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
const DecodeParam kVP8InvalidFileTests[] = {
{ 1, "invalid-bug-1443.ivf" },
{ 1, "invalid-token-partition.ivf" },
+ { 1, "invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf" },
};
VP8_INSTANTIATE_TEST_CASE(InvalidFileTest,
diff --git a/test/ivf_video_source.h b/test/ivf_video_source.h
index 5862d2649..4b5d55469 100644
--- a/test/ivf_video_source.h
+++ b/test/ivf_video_source.h
@@ -16,7 +16,7 @@
#include "test/video_source.h"
namespace libvpx_test {
-const unsigned int kCodeBufferSize = 256 * 1024;
+const unsigned int kCodeBufferSize = 256 * 1024 * 1024;
const unsigned int kIvfFileHdrSize = 32;
const unsigned int kIvfFrameHdrSize = 12;
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
index 5a2ade1ef..1fe0348fc 100644
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -11,6 +11,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
+#include "test/bench.h"
#include "test/buffer.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
@@ -32,7 +33,6 @@ typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows,
int cols, int flimit);
namespace {
-
// Compute the filter level used in post proc from the loop filter strength
int q2mbl(int x) {
if (x < 20) x = 20;
@@ -42,18 +42,36 @@ int q2mbl(int x) {
}
class VpxPostProcDownAndAcrossMbRowTest
- : public ::testing::TestWithParam<VpxPostProcDownAndAcrossMbRowFunc> {
+ : public AbstractBench,
+ public ::testing::TestWithParam<VpxPostProcDownAndAcrossMbRowFunc> {
public:
+ VpxPostProcDownAndAcrossMbRowTest() : mbPostProcDownAndAcross(GetParam()) {}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ const VpxPostProcDownAndAcrossMbRowFunc mbPostProcDownAndAcross;
+ // Size of the underlying data block that will be filtered.
+ int block_width;
+ int block_height;
+ Buffer<uint8_t> *src_image;
+ Buffer<uint8_t> *dst_image;
+ uint8_t *flimits;
+ void run();
};
+void VpxPostProcDownAndAcrossMbRowTest::run() {
+ mbPostProcDownAndAcross(src_image->TopLeftPixel(), dst_image->TopLeftPixel(),
+ src_image->stride(), dst_image->stride(), block_width,
+ flimits, 16);
+}
+
// Test routine for the VPx post-processing function
// vpx_post_proc_down_and_across_mb_row_c.
TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
// Size of the underlying data block that will be filtered.
- const int block_width = 16;
- const int block_height = 16;
+ block_width = 16;
+ block_height = 16;
// 5-tap filter needs 2 padding rows above and below the block in the input.
Buffer<uint8_t> src_image = Buffer<uint8_t>(block_width, block_height, 2);
@@ -66,8 +84,7 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
Buffer<uint8_t>(block_width, block_height, 8, 16, 8, 8);
ASSERT_TRUE(dst_image.Init());
- uint8_t *const flimits =
- reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
+ flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
(void)memset(flimits, 255, block_width);
// Initialize pixels in the input:
@@ -79,13 +96,12 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
// Initialize pixels in the output to 99.
dst_image.Set(99);
- ASM_REGISTER_STATE_CHECK(GetParam()(
+ ASM_REGISTER_STATE_CHECK(mbPostProcDownAndAcross(
src_image.TopLeftPixel(), dst_image.TopLeftPixel(), src_image.stride(),
dst_image.stride(), block_width, flimits, 16));
- static const uint8_t kExpectedOutput[block_height] = {
- 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
- };
+ static const uint8_t kExpectedOutput[] = { 4, 3, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 3, 4 };
uint8_t *pixel_ptr = dst_image.TopLeftPixel();
for (int i = 0; i < block_height; ++i) {
@@ -103,8 +119,8 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
// Size of the underlying data block that will be filtered.
// Y blocks are always a multiple of 16 wide and exactly 16 high. U and V
// blocks are always a multiple of 8 wide and exactly 8 high.
- const int block_width = 136;
- const int block_height = 16;
+ block_width = 136;
+ block_height = 16;
// 5-tap filter needs 2 padding rows above and below the block in the input.
// SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
@@ -127,8 +143,7 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
// can have a different filter. SSE2 assembly reads flimits in blocks of 16 so
// it must be padded out.
const int flimits_width = block_width % 16 ? block_width + 8 : block_width;
- uint8_t *const flimits =
- reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
+ flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
ACMRandom rnd;
rnd.Reset(ACMRandom::DeterministicSeed());
@@ -143,7 +158,6 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
for (int f = 0; f < 255; f++) {
(void)memset(flimits + blocks, f, sizeof(*flimits) * 8);
-
dst_image.Set(0);
dst_image_ref.Set(0);
@@ -151,10 +165,10 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
src_image.TopLeftPixel(), dst_image_ref.TopLeftPixel(),
src_image.stride(), dst_image_ref.stride(), block_width, flimits,
block_height);
- ASM_REGISTER_STATE_CHECK(
- GetParam()(src_image.TopLeftPixel(), dst_image.TopLeftPixel(),
- src_image.stride(), dst_image.stride(), block_width,
- flimits, block_height));
+ ASM_REGISTER_STATE_CHECK(mbPostProcDownAndAcross(
+ src_image.TopLeftPixel(), dst_image.TopLeftPixel(),
+ src_image.stride(), dst_image.stride(), block_width, flimits,
+ block_height));
ASSERT_TRUE(dst_image.CheckValues(dst_image_ref));
}
@@ -163,12 +177,58 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
vpx_free(flimits);
}
+TEST_P(VpxPostProcDownAndAcrossMbRowTest, DISABLED_Speed) {
+ // Size of the underlying data block that will be filtered.
+ block_width = 16;
+ block_height = 16;
+
+ // 5-tap filter needs 2 padding rows above and below the block in the input.
+ Buffer<uint8_t> src_image = Buffer<uint8_t>(block_width, block_height, 2);
+ ASSERT_TRUE(src_image.Init());
+ this->src_image = &src_image;
+
+ // Filter extends output block by 8 samples at left and right edges.
+ // Though the left padding is only 8 bytes, the assembly code tries to
+ // read 16 bytes before the pointer.
+ Buffer<uint8_t> dst_image =
+ Buffer<uint8_t>(block_width, block_height, 8, 16, 8, 8);
+ ASSERT_TRUE(dst_image.Init());
+ this->dst_image = &dst_image;
+
+ flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
+ (void)memset(flimits, 255, block_width);
+
+ // Initialize pixels in the input:
+ // block pixels to value 1,
+ // border pixels to value 10.
+ src_image.SetPadding(10);
+ src_image.Set(1);
+
+ // Initialize pixels in the output to 99.
+ dst_image.Set(99);
+
+ runNTimes(INT16_MAX);
+ printMedian("16x16");
+
+ vpx_free(flimits);
+};
+
class VpxMbPostProcAcrossIpTest
- : public ::testing::TestWithParam<VpxMbPostProcAcrossIpFunc> {
+ : public AbstractBench,
+ public ::testing::TestWithParam<VpxMbPostProcAcrossIpFunc> {
public:
+ VpxMbPostProcAcrossIpTest()
+ : rows(16), cols(16), mbPostProcAcrossIp(GetParam()),
+ src(Buffer<uint8_t>(rows, cols, 8, 8, 17, 8)) {}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
+ const int rows;
+ const int cols;
+ const VpxMbPostProcAcrossIpFunc mbPostProcAcrossIp;
+ Buffer<uint8_t> src;
+ void run();
+
void SetCols(unsigned char *s, int rows, int cols, int src_width) {
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
@@ -197,11 +257,11 @@ class VpxMbPostProcAcrossIpTest
}
};
-TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
- const int rows = 16;
- const int cols = 16;
+void VpxMbPostProcAcrossIpTest::run() {
+ mbPostProcAcrossIp(src.TopLeftPixel(), src.stride(), rows, cols, q2mbl(0));
+}
- Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
+TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
ASSERT_TRUE(src.Init());
src.SetPadding(10);
SetCols(src.TopLeftPixel(), rows, cols, src.stride());
@@ -215,15 +275,11 @@ TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
}
TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) {
- const int rows = 16;
- const int cols = 16;
-
- Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
ASSERT_TRUE(src.Init());
src.SetPadding(10);
SetCols(src.TopLeftPixel(), rows, cols, src.stride());
- static const unsigned char kExpectedOutput[cols] = {
+ static const unsigned char kExpectedOutput[] = {
2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13
};
@@ -232,15 +288,11 @@ TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) {
}
TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) {
- const int rows = 16;
- const int cols = 16;
-
- Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
ASSERT_TRUE(src.Init());
src.SetPadding(10);
SetCols(src.TopLeftPixel(), rows, cols, src.stride());
- static const unsigned char kExpectedOutput[cols] = {
+ static const unsigned char kExpectedOutput[] = {
2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13
};
@@ -254,9 +306,6 @@ TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) {
}
TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) {
- const int rows = 16;
- const int cols = 16;
-
Buffer<uint8_t> c_mem = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
ASSERT_TRUE(c_mem.Init());
Buffer<uint8_t> asm_mem = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
@@ -279,12 +328,33 @@ TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) {
}
}
+TEST_P(VpxMbPostProcAcrossIpTest, DISABLED_Speed) {
+ ASSERT_TRUE(src.Init());
+ src.SetPadding(10);
+
+ SetCols(src.TopLeftPixel(), rows, cols, src.stride());
+
+ runNTimes(100000);
+ printMedian("16x16");
+}
+
class VpxMbPostProcDownTest
- : public ::testing::TestWithParam<VpxMbPostProcDownFunc> {
+ : public AbstractBench,
+ public ::testing::TestWithParam<VpxMbPostProcDownFunc> {
public:
+ VpxMbPostProcDownTest()
+ : rows(16), cols(16), mbPostProcDown(GetParam()),
+ src_c(Buffer<uint8_t>(rows, cols, 8, 8, 8, 17)) {}
+
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
+ const int rows;
+ const int cols;
+ const VpxMbPostProcDownFunc mbPostProcDown;
+ Buffer<uint8_t> src_c;
+ void run();
+
void SetRows(unsigned char *src_c, int rows, int cols, int src_width) {
for (int r = 0; r < rows; r++) {
memset(src_c, r, cols);
@@ -306,22 +376,22 @@ class VpxMbPostProcDownTest
void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
int filter_level, const unsigned char *expected_output) {
ASM_REGISTER_STATE_CHECK(
- GetParam()(s, src_width, rows, cols, filter_level));
+ mbPostProcDown(s, src_width, rows, cols, filter_level));
RunComparison(expected_output, s, rows, cols, src_width);
}
};
-TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
- const int rows = 16;
- const int cols = 16;
+void VpxMbPostProcDownTest::run() {
+ mbPostProcDown(src_c.TopLeftPixel(), src_c.stride(), rows, cols, q2mbl(0));
+}
- Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
+TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
ASSERT_TRUE(src_c.Init());
src_c.SetPadding(10);
SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
- static const unsigned char kExpectedOutput[rows * cols] = {
+ static const unsigned char kExpectedOutput[] = {
2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 3, 3, 3,
@@ -348,16 +418,12 @@ TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
}
TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) {
- const int rows = 16;
- const int cols = 16;
-
- Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
ASSERT_TRUE(src_c.Init());
src_c.SetPadding(10);
SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
- static const unsigned char kExpectedOutput[rows * cols] = {
+ static const unsigned char kExpectedOutput[] = {
2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -379,10 +445,6 @@ TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) {
}
TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) {
- const int rows = 16;
- const int cols = 16;
-
- Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
ASSERT_TRUE(src_c.Init());
src_c.SetPadding(10);
@@ -399,13 +461,9 @@ TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) {
}
TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
- const int rows = 16;
- const int cols = 16;
-
ACMRandom rnd;
rnd.Reset(ACMRandom::DeterministicSeed());
- Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
ASSERT_TRUE(src_c.Init());
Buffer<uint8_t> src_asm = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
ASSERT_TRUE(src_asm.Init());
@@ -418,7 +476,7 @@ TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols,
q2mbl(level));
- ASM_REGISTER_STATE_CHECK(GetParam()(
+ ASM_REGISTER_STATE_CHECK(mbPostProcDown(
src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level)));
ASSERT_TRUE(src_asm.CheckValues(src_c));
@@ -429,12 +487,22 @@ TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols,
q2mbl(level));
- ASM_REGISTER_STATE_CHECK(GetParam()(
+ ASM_REGISTER_STATE_CHECK(mbPostProcDown(
src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level)));
ASSERT_TRUE(src_asm.CheckValues(src_c));
}
}
+TEST_P(VpxMbPostProcDownTest, DISABLED_Speed) {
+ ASSERT_TRUE(src_c.Init());
+ src_c.SetPadding(10);
+
+ SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
+
+ runNTimes(100000);
+ printMedian("16x16");
+}
+
INSTANTIATE_TEST_CASE_P(
C, VpxPostProcDownAndAcrossMbRowTest,
::testing::Values(vpx_post_proc_down_and_across_mb_row_c));
@@ -481,4 +549,16 @@ INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest,
::testing::Values(vpx_mbpost_proc_down_msa));
#endif // HAVE_MSA
+#if HAVE_VSX
+INSTANTIATE_TEST_CASE_P(
+ VSX, VpxPostProcDownAndAcrossMbRowTest,
+ ::testing::Values(vpx_post_proc_down_and_across_mb_row_vsx));
+
+INSTANTIATE_TEST_CASE_P(VSX, VpxMbPostProcAcrossIpTest,
+ ::testing::Values(vpx_mbpost_proc_across_ip_vsx));
+
+INSTANTIATE_TEST_CASE_P(VSX, VpxMbPostProcDownTest,
+ ::testing::Values(vpx_mbpost_proc_down_vsx));
+#endif // HAVE_VSX
+
} // namespace
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index be3a1969c..b9fbd8f4f 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -115,6 +115,8 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
key_frame_spacing_ = 9999;
num_nonref_frames_ = 0;
layer_framedrop_ = 0;
+ force_key_ = 0;
+ force_key_test_ = 0;
}
virtual void BeginPassHook(unsigned int /*pass*/) {}
@@ -203,6 +205,7 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
svc_drop_frame.framedrop_mode = LAYER_DROP;
for (i = 0; i < number_spatial_layers_; i++)
svc_drop_frame.framedrop_thresh[i] = 30;
+ svc_drop_frame.max_consec_drop = 30;
encoder->Control(VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
}
}
@@ -268,7 +271,7 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
}
if (dynamic_drop_layer_) {
- if (video->frame() == 50) {
+ if (video->frame() == 0) {
// Change layer bitrates to set top layers to 0. This will trigger skip
// encoding/dropping of top two spatial layers.
cfg_.rc_target_bitrate -=
@@ -278,7 +281,25 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
cfg_.layer_target_bitrate[1] = 0;
cfg_.layer_target_bitrate[2] = 0;
encoder->Config(&cfg_);
+ } else if (video->frame() == 50) {
+ // Change layer bitrates to non-zero on two top spatial layers.
+ // This will trigger skip encoding of top two spatial layers.
+ cfg_.layer_target_bitrate[1] = middle_bitrate_;
+ cfg_.layer_target_bitrate[2] = top_bitrate_;
+ cfg_.rc_target_bitrate +=
+ cfg_.layer_target_bitrate[2] + cfg_.layer_target_bitrate[1];
+ encoder->Config(&cfg_);
} else if (video->frame() == 100) {
+ // Change layer bitrates to set top layers to 0. This will trigger skip
+ // encoding/dropping of top two spatial layers.
+ cfg_.rc_target_bitrate -=
+ (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]);
+ middle_bitrate_ = cfg_.layer_target_bitrate[1];
+ top_bitrate_ = cfg_.layer_target_bitrate[2];
+ cfg_.layer_target_bitrate[1] = 0;
+ cfg_.layer_target_bitrate[2] = 0;
+ encoder->Config(&cfg_);
+ } else if (video->frame() == 150) {
// Change layer bitrate on second layer to non-zero to start
// encoding it again.
cfg_.layer_target_bitrate[1] = middle_bitrate_;
@@ -292,12 +313,21 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
encoder->Config(&cfg_);
}
}
+
+ if (force_key_test_ && force_key_)
+ frame_flags_ = VPX_EFLAG_FORCE_KF;
+ else
+ frame_flags_ = 0;
+
const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0;
}
- virtual void PostEncodeFrameHook() {
+ virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
+ vpx_svc_layer_id_t layer_id;
+ encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
+ temporal_layer_id_ = layer_id.temporal_layer_id;
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
@@ -366,13 +396,19 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
// In the constrained frame drop mode, if a given spatial is dropped all
// upper layers must be dropped too.
if (!layer_framedrop_) {
+ int num_layers_dropped = 0;
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
if (!pkt->data.frame.spatial_layer_encoded[sl]) {
// Check that all upper layers are dropped.
+ num_layers_dropped++;
for (int sl2 = sl + 1; sl2 < number_spatial_layers_; ++sl2)
ASSERT_EQ(pkt->data.frame.spatial_layer_encoded[sl2], 0);
}
}
+ if (num_layers_dropped == number_spatial_layers_ - 1)
+ force_key_ = 1;
+ else
+ force_key_ = 0;
}
// Keep track of number of non-reference frames, needed for mismatch check.
// Non-reference frames are top spatial and temporal layer frames,
@@ -461,6 +497,8 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest {
int key_frame_spacing_;
unsigned int num_nonref_frames_;
int layer_framedrop_;
+ int force_key_;
+ int force_key_test_;
};
// Params: speed setting.
@@ -528,6 +566,53 @@ TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2SL1TLScreenContent1) {
}
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
+// 3 temporal layers, with force key frame after frame drop
+TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLForceKey) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 1;
+ cfg_.temporal_layering_mode = 3;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ cfg_.rc_dropframe_thresh = 30;
+ cfg_.kf_max_dist = 9999;
+ number_spatial_layers_ = cfg_.ss_number_layers;
+ number_temporal_layers_ = cfg_.ts_number_layers;
+ ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+ 0, 400);
+ top_sl_width_ = 640;
+ top_sl_height_ = 480;
+ cfg_.rc_target_bitrate = 100;
+ ResetModel();
+ AssignLayerBitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+ layer_target_avg_bandwidth_, bits_in_buffer_model_);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
+ number_temporal_layers_, file_datarate_, 0.78, 1.25);
+#if CONFIG_VP9_DECODER
+ // The non-reference frames are expected to be mismatched frames as the
+ // encoder will avoid loopfilter on these frames.
+ EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
// 3 temporal layers. Run CIF clip with 1 thread.
TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TL) {
cfg_.rc_buf_initial_sz = 500;
@@ -711,9 +796,9 @@ TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL_DisableEnableLayers) {
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- // Don't check rate targeting on top spatial layer since it will be skipped
- // for part of the sequence.
- CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
+ // Don't check rate targeting on two top spatial layer since they will be
+ // skipped for part of the sequence.
+ CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 2,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
#if CONFIG_VP9_DECODER
// The non-reference frames are expected to be mismatched frames as the
@@ -848,7 +933,7 @@ TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc2SL3TL4Threads) {
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
- number_temporal_layers_, file_datarate_, 0.75, 1.2);
+ number_temporal_layers_, file_datarate_, 0.75, 1.45);
#if CONFIG_VP9_DECODER
// The non-reference frames are expected to be mismatched frames as the
// encoder will avoid loopfilter on these frames.
@@ -1147,20 +1232,21 @@ TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc2SL3TLSmallKf) {
}
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSingleBR,
- ::testing::Range(5, 9));
+ ::testing::Range(5, 10));
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 9),
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 10),
::testing::Range(0, 3));
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcFrameDropMultiBR,
- ::testing::Range(5, 9), ::testing::Range(0, 2),
+ ::testing::Range(5, 10), ::testing::Range(0, 2),
::testing::Range(0, 3));
#if CONFIG_VP9_TEMPORAL_DENOISING
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, ::testing::Range(5, 9),
- ::testing::Range(1, 3), ::testing::Range(0, 3));
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser,
+ ::testing::Range(5, 10), ::testing::Range(1, 3),
+ ::testing::Range(0, 3));
#endif
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 9),
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 10),
::testing::Range(32, 36));
} // namespace
diff --git a/test/test-data.mk b/test/test-data.mk
index 7ca11bc9c..4be6c66ff 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -738,6 +738,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 3a23ff5db..9cb9d5864 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -856,3 +856,5 @@ fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res
90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res
17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm
e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5
+a0fbbbc5dd50fd452096f4455a58c1a8c9f66697 *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf
+a61774cf03fc584bd9f0904fc145253bb8ea6c4c *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res
diff --git a/test/test.mk b/test/test.mk
index 3e5739e21..224ac4e8f 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -1,4 +1,6 @@
LIBVPX_TEST_SRCS-yes += acm_random.h
+LIBVPX_TEST_SRCS-yes += bench.h
+LIBVPX_TEST_SRCS-yes += bench.cc
LIBVPX_TEST_SRCS-yes += buffer.h
LIBVPX_TEST_SRCS-yes += clear_system_state.h
LIBVPX_TEST_SRCS-yes += codec_factory.h
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 725821ae6..fce7a1475 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1533,6 +1533,27 @@ INSTANTIATE_TEST_CASE_P(VSX, SumOfSquaresTest,
INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest,
::testing::Values(SseParams(2, 2,
&vpx_get4x4sse_cs_vsx)));
+INSTANTIATE_TEST_CASE_P(VSX, VpxMseTest,
+ ::testing::Values(MseParams(4, 4, &vpx_mse16x16_vsx),
+ MseParams(4, 3, &vpx_mse16x8_vsx),
+ MseParams(3, 4, &vpx_mse8x16_vsx),
+ MseParams(3, 3, &vpx_mse8x8_vsx)));
+
+INSTANTIATE_TEST_CASE_P(
+ VSX, VpxVarianceTest,
+ ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_vsx),
+ VarianceParams(6, 5, &vpx_variance64x32_vsx),
+ VarianceParams(5, 6, &vpx_variance32x64_vsx),
+ VarianceParams(5, 5, &vpx_variance32x32_vsx),
+ VarianceParams(5, 4, &vpx_variance32x16_vsx),
+ VarianceParams(4, 5, &vpx_variance16x32_vsx),
+ VarianceParams(4, 4, &vpx_variance16x16_vsx),
+ VarianceParams(4, 3, &vpx_variance16x8_vsx),
+ VarianceParams(3, 4, &vpx_variance8x16_vsx),
+ VarianceParams(3, 3, &vpx_variance8x8_vsx),
+ VarianceParams(3, 2, &vpx_variance8x4_vsx),
+ VarianceParams(2, 3, &vpx_variance4x8_vsx),
+ VarianceParams(2, 2, &vpx_variance4x4_vsx)));
#endif // HAVE_VSX
#if HAVE_MMI
diff --git a/test/vp9_datarate_test.cc b/test/vp9_datarate_test.cc
index c4dbcacbe..a8bcc2a43 100644
--- a/test/vp9_datarate_test.cc
+++ b/test/vp9_datarate_test.cc
@@ -266,7 +266,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagZero) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35)
+ ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.36)
<< " The datarate for the file is greater than target by too much!";
}
@@ -294,7 +294,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagNonZero) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.30)
+ ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35)
<< " The datarate for the file is greater than target by too much!";
}
@@ -824,16 +824,17 @@ TEST_P(DatarateTestVP9LargeDenoiser, DenoiserOffOn) {
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(2, 9), ::testing::Range(0, 4));
+ ::testing::Range(2, 10), ::testing::Range(0, 4));
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeOneBR,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(2, 9));
+ ::testing::Range(2, 10));
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 10));
#if CONFIG_VP9_TEMPORAL_DENOISING
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
+ ::testing::Range(5, 10));
#endif
} // namespace
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index 6b7e51211..44659904f 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -409,7 +409,7 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(3, 9), // cpu_used
+ ::testing::Range(3, 10), // cpu_used
::testing::Range(0, 3), // tile_columns
::testing::Range(2, 5))); // threads
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index f0bbedbfa..c39267faa 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -18,6 +18,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
+#include "test/bench.h"
#include "test/buffer.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
@@ -67,10 +68,13 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
scan, iscan);
}
-class VP9QuantizeBase {
+class VP9QuantizeBase : public AbstractBench {
public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
- : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
+ : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
+ coeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
+ qcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
+ dqcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
max_value_ = (1 << bit_depth_) - 1;
zbin_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
@@ -86,6 +90,9 @@ class VP9QuantizeBase {
vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
dequant_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
+
+ r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+ q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
}
~VP9QuantizeBase() {
@@ -118,6 +125,15 @@ class VP9QuantizeBase {
int max_value_;
const int max_size_;
const bool is_fp_;
+ Buffer<tran_low_t> coeff;
+ Buffer<tran_low_t> qcoeff;
+ Buffer<tran_low_t> dqcoeff;
+ int16_t *r_ptr;
+ int16_t *q_ptr;
+ int count;
+ int skip_block;
+ const scan_order *scan;
+ uint16_t eob;
};
class VP9QuantizeTest : public VP9QuantizeBase,
@@ -128,10 +144,17 @@ class VP9QuantizeTest : public VP9QuantizeBase,
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
protected:
+ void run();
const QuantizeFunc quantize_op_;
const QuantizeFunc ref_quantize_op_;
};
+void VP9QuantizeTest::run() {
+ quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
+ quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
+ dequant_ptr_, &eob, scan->scan, scan->iscan);
+}
+
// This quantizer compares the AC coefficients to the quantization step size to
// determine if further multiplication operations are needed.
// Based on vp9_quantize_fp_sse2().
@@ -269,11 +292,8 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
TEST_P(VP9QuantizeTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
- Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
ASSERT_TRUE(coeff.Init());
- Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(qcoeff.Init());
- Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(dqcoeff.Init());
Buffer<tran_low_t> ref_qcoeff =
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
@@ -281,7 +301,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
Buffer<tran_low_t> ref_dqcoeff =
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(ref_dqcoeff.Init());
- uint16_t eob, ref_eob;
+ uint16_t ref_eob = 0;
+ eob = 0;
for (int i = 0; i < number_of_iterations; ++i) {
// Test skip block for the first three iterations to catch all the different
@@ -294,23 +315,21 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
sz = TX_32X32;
}
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
- const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
- const int count = (4 << sz) * (4 << sz);
+ scan = &vp9_scan_orders[sz][tx_type];
+ count = (4 << sz) * (4 << sz);
coeff.Set(&rnd, -max_value_, max_value_);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
- int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
- int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
- scan_order->scan, scan_order->iscan);
+ scan->scan, scan->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_(
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
- dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+ dequant_ptr_, &eob, scan->scan, scan->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -328,11 +347,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
TEST_P(VP9QuantizeTest, EOBCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
- Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
ASSERT_TRUE(coeff.Init());
- Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(qcoeff.Init());
- Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(dqcoeff.Init());
Buffer<tran_low_t> ref_qcoeff =
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
@@ -340,10 +356,12 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
Buffer<tran_low_t> ref_dqcoeff =
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(ref_dqcoeff.Init());
- uint16_t eob, ref_eob;
+ uint16_t ref_eob = 0;
+ eob = 0;
+ const uint32_t max_index = max_size_ * max_size_ - 1;
for (int i = 0; i < number_of_iterations; ++i) {
- const int skip_block = 0;
+ skip_block = 0;
TX_SIZE sz;
if (max_size_ == 16) {
sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
@@ -351,28 +369,26 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
sz = TX_32X32;
}
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
- const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
- int count = (4 << sz) * (4 << sz);
+ scan = &vp9_scan_orders[sz][tx_type];
+ count = (4 << sz) * (4 << sz);
// Two random entries
coeff.Set(0);
- coeff.TopLeftPixel()[rnd(count)] =
+ coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
- coeff.TopLeftPixel()[rnd(count)] =
+ coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
- int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
- int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
- scan_order->scan, scan_order->iscan);
+ scan->scan, scan->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_(
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
- dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+ dequant_ptr_, &eob, scan->scan, scan->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -390,13 +406,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
TEST_P(VP9QuantizeTest, DISABLED_Speed) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
- Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
ASSERT_TRUE(coeff.Init());
- Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(qcoeff.Init());
- Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
ASSERT_TRUE(dqcoeff.Init());
- uint16_t eob;
TX_SIZE starting_sz, ending_sz;
if (max_size_ == 16) {
@@ -410,18 +422,16 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
// zbin > coeff, zbin < coeff.
for (int i = 0; i < 2; ++i) {
- const int skip_block = 0;
+ skip_block = 0;
// TX_TYPE defines the scan order. That is not relevant to the speed test.
// Pick the first one.
const TX_TYPE tx_type = DCT_DCT;
- const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
- const int count = (4 << sz) * (4 << sz);
+ count = (4 << sz) * (4 << sz);
+ scan = &vp9_scan_orders[sz][tx_type];
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
- int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
- int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
if (i == 0) {
// When |coeff values| are less than zbin the results are 0.
@@ -438,22 +448,15 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
coeff.Set(&rnd, -500, 500);
}
- vpx_usec_timer timer;
- vpx_usec_timer_start(&timer);
- for (int j = 0; j < 100000000 / count; ++j) {
- quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
- q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
- dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
- scan_order->scan, scan_order->iscan);
- }
- vpx_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
- if (i == 0) printf("Bypass calculations.\n");
- if (i == 1) printf("Full calculations.\n");
- printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
- elapsed_time / 1000);
+ runNTimes(10000000 / count);
+ const char *type =
+ (i == 0) ? "Bypass calculations " : "Full calculations ";
+ char block_size[16];
+ snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
+ char title[100];
+ snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
+ printMedian(title);
}
- printf("\n");
}
}
@@ -557,6 +560,16 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8, 32, true)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
+#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(VSX, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&vpx_quantize_b_vsx,
+ &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_vsx,
+ &vpx_quantize_b_32x32_c,
+ VPX_BITS_8, 32, false)));
+#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
+
// Only useful to compare "Speed" test results.
INSTANTIATE_TEST_CASE_P(
DISABLED_C, VP9QuantizeTest,
@@ -575,10 +588,3 @@ INSTANTIATE_TEST_CASE_P(
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
true)));
} // namespace
-
-#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(VSX, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_vsx,
- &vpx_quantize_b_c,
- VPX_BITS_8, 16, false)));
-#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
diff --git a/third_party/libwebm/Android.mk b/third_party/libwebm/Android.mk
index 8149a083f..b46ba101d 100644
--- a/third_party/libwebm/Android.mk
+++ b/third_party/libwebm/Android.mk
@@ -3,7 +3,7 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE:= libwebm
LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -Wno-extern-c-compat
+LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11
LOCAL_C_INCLUDES:= $(LOCAL_PATH)
LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH)
diff --git a/third_party/libwebm/README.libvpx b/third_party/libwebm/README.libvpx
index ebb5ff2f4..6d8b0b4cc 100644
--- a/third_party/libwebm/README.libvpx
+++ b/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
URL: https://chromium.googlesource.com/webm/libwebm
-Version: 0ae757087f5e6eb01dfea16cc09205b2425cfb74
+Version: af81f26025b7435fa9a14ad07c58b44cf9280430
License: BSD
License File: LICENSE.txt
@@ -7,4 +7,14 @@ Description:
libwebm is used to handle WebM container I/O.
Local Changes:
-* <none>
+Only keep:
+ - Android.mk
+ - AUTHORS.TXT
+ - common/
+ file_util.cc/h
+ hdr_util.cc/h
+ webmids.h
+ - LICENSE.TXT
+ - mkvmuxer/
+ - mkvparser/
+ - PATENTS.TXT
diff --git a/third_party/libwebm/common/file_util.cc b/third_party/libwebm/common/file_util.cc
index 6dab146dd..618ffc087 100644
--- a/third_party/libwebm/common/file_util.cc
+++ b/third_party/libwebm/common/file_util.cc
@@ -17,6 +17,7 @@
#include <cstring>
#include <fstream>
#include <ios>
+#include <string>
namespace libwebm {
@@ -41,7 +42,12 @@ std::string GetTempFileName() {
return temp_file_name;
#else
char tmp_file_name[_MAX_PATH];
+#if defined _MSC_VER || defined MINGW_HAS_SECURE_API
errno_t err = tmpnam_s(tmp_file_name);
+#else
+ char* fname_pointer = tmpnam(tmp_file_name);
+ errno_t err = (fname_pointer == &tmp_file_name[0]) ? 0 : -1;
+#endif
if (err == 0) {
return std::string(tmp_file_name);
}
@@ -65,6 +71,15 @@ uint64_t GetFileSize(const std::string& file_name) {
return file_size;
}
+bool GetFileContents(const std::string& file_name, std::string* contents) {
+ std::ifstream file(file_name.c_str());
+ *contents = std::string(static_cast<size_t>(GetFileSize(file_name)), 0);
+ if (file.good() && contents->size()) {
+ file.read(&(*contents)[0], contents->size());
+ }
+ return !file.fail();
+}
+
TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); }
TempFileDeleter::~TempFileDeleter() {
diff --git a/third_party/libwebm/common/file_util.h b/third_party/libwebm/common/file_util.h
index 0e71eac11..a87373464 100644
--- a/third_party/libwebm/common/file_util.h
+++ b/third_party/libwebm/common/file_util.h
@@ -22,6 +22,9 @@ std::string GetTempFileName();
// Returns size of file specified by |file_name|, or 0 upon failure.
uint64_t GetFileSize(const std::string& file_name);
+// Gets the contents file_name as a string. Returns false on error.
+bool GetFileContents(const std::string& file_name, std::string* contents);
+
// Manages life of temporary file specified at time of construction. Deletes
// file upon destruction.
class TempFileDeleter {
@@ -38,4 +41,4 @@ class TempFileDeleter {
} // namespace libwebm
-#endif // LIBWEBM_COMMON_FILE_UTIL_H_ \ No newline at end of file
+#endif // LIBWEBM_COMMON_FILE_UTIL_H_
diff --git a/third_party/libwebm/common/hdr_util.cc b/third_party/libwebm/common/hdr_util.cc
index e1618ce75..916f7170b 100644
--- a/third_party/libwebm/common/hdr_util.cc
+++ b/third_party/libwebm/common/hdr_util.cc
@@ -36,10 +36,10 @@ bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
if (MasteringMetadataValuePresent(parser_mm.luminance_min))
muxer_mm->set_luminance_min(parser_mm.luminance_min);
- PrimaryChromaticityPtr r_ptr(NULL);
- PrimaryChromaticityPtr g_ptr(NULL);
- PrimaryChromaticityPtr b_ptr(NULL);
- PrimaryChromaticityPtr wp_ptr(NULL);
+ PrimaryChromaticityPtr r_ptr(nullptr);
+ PrimaryChromaticityPtr g_ptr(nullptr);
+ PrimaryChromaticityPtr b_ptr(nullptr);
+ PrimaryChromaticityPtr wp_ptr(nullptr);
if (parser_mm.r) {
if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr))
diff --git a/third_party/libwebm/common/hdr_util.h b/third_party/libwebm/common/hdr_util.h
index 3ef5388fd..78e2eeb70 100644
--- a/third_party/libwebm/common/hdr_util.h
+++ b/third_party/libwebm/common/hdr_util.h
@@ -47,15 +47,7 @@ struct Vp9CodecFeatures {
int chroma_subsampling;
};
-// disable deprecation warnings for auto_ptr
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic pop
-#endif
+typedef std::unique_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
PrimaryChromaticityPtr* muxer_pc);
diff --git a/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/third_party/libwebm/mkvmuxer/mkvmuxer.cc
index 15b9a908d..481771db2 100644
--- a/third_party/libwebm/mkvmuxer/mkvmuxer.cc
+++ b/third_party/libwebm/mkvmuxer/mkvmuxer.cc
@@ -8,6 +8,8 @@
#include "mkvmuxer/mkvmuxer.h"
+#include <stdint.h>
+
#include <cfloat>
#include <climits>
#include <cstdio>
@@ -24,11 +26,6 @@
#include "mkvmuxer/mkvwriter.h"
#include "mkvparser/mkvparser.h"
-// disable deprecation warnings for auto_ptr
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-
namespace mkvmuxer {
const float PrimaryChromaticity::kChromaticityMin = 0.0f;
@@ -72,7 +69,7 @@ bool StrCpy(const char* src, char** dst_ptr) {
return true;
}
-typedef std::auto_ptr<PrimaryChromaticity> PrimaryChromaticityPtr;
+typedef std::unique_ptr<PrimaryChromaticity> PrimaryChromaticityPtr;
bool CopyChromaticity(const PrimaryChromaticity* src,
PrimaryChromaticityPtr* dst) {
if (!dst)
@@ -1057,22 +1054,22 @@ bool MasteringMetadata::Write(IMkvWriter* writer) const {
bool MasteringMetadata::SetChromaticity(
const PrimaryChromaticity* r, const PrimaryChromaticity* g,
const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) {
- PrimaryChromaticityPtr r_ptr(NULL);
+ PrimaryChromaticityPtr r_ptr(nullptr);
if (r) {
if (!CopyChromaticity(r, &r_ptr))
return false;
}
- PrimaryChromaticityPtr g_ptr(NULL);
+ PrimaryChromaticityPtr g_ptr(nullptr);
if (g) {
if (!CopyChromaticity(g, &g_ptr))
return false;
}
- PrimaryChromaticityPtr b_ptr(NULL);
+ PrimaryChromaticityPtr b_ptr(nullptr);
if (b) {
if (!CopyChromaticity(b, &b_ptr))
return false;
}
- PrimaryChromaticityPtr wp_ptr(NULL);
+ PrimaryChromaticityPtr wp_ptr(nullptr);
if (white_point) {
if (!CopyChromaticity(white_point, &wp_ptr))
return false;
@@ -1238,7 +1235,7 @@ bool Colour::Write(IMkvWriter* writer) const {
}
bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) {
- std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
if (!mm_ptr.get())
return false;
@@ -1546,7 +1543,7 @@ bool VideoTrack::Write(IMkvWriter* writer) const {
}
bool VideoTrack::SetColour(const Colour& colour) {
- std::auto_ptr<Colour> colour_ptr(new Colour());
+ std::unique_ptr<Colour> colour_ptr(new Colour());
if (!colour_ptr.get())
return false;
@@ -1574,7 +1571,7 @@ bool VideoTrack::SetColour(const Colour& colour) {
}
bool VideoTrack::SetProjection(const Projection& projection) {
- std::auto_ptr<Projection> projection_ptr(new Projection());
+ std::unique_ptr<Projection> projection_ptr(new Projection());
if (!projection_ptr.get())
return false;
@@ -2666,7 +2663,7 @@ bool Cluster::QueueOrWriteFrame(const Frame* const frame) {
// and write it if it is okay to do so (i.e.) no other track has an held back
// frame with timestamp <= the timestamp of the frame in question.
std::vector<std::list<Frame*>::iterator> frames_to_erase;
- for (std::list<Frame *>::iterator
+ for (std::list<Frame*>::iterator
current_track_iterator = stored_frames_[track_number].begin(),
end = --stored_frames_[track_number].end();
current_track_iterator != end; ++current_track_iterator) {
diff --git a/third_party/libwebm/mkvparser/mkvparser.cc b/third_party/libwebm/mkvparser/mkvparser.cc
index 37f230d0a..e7b76f7da 100644
--- a/third_party/libwebm/mkvparser/mkvparser.cc
+++ b/third_party/libwebm/mkvparser/mkvparser.cc
@@ -22,12 +22,8 @@
#include "common/webmids.h"
-// disable deprecation warnings for auto_ptr
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-
namespace mkvparser {
+const long long kStringElementSizeLimit = 20 * 1000 * 1000;
const float MasteringMetadata::kValueNotPresent = FLT_MAX;
const long long Colour::kValueNotPresent = LLONG_MAX;
const float Projection::kValueNotPresent = FLT_MAX;
@@ -330,7 +326,7 @@ long UnserializeString(IMkvReader* pReader, long long pos, long long size,
delete[] str;
str = NULL;
- if (size >= LONG_MAX || size < 0)
+ if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit)
return E_FILE_FORMAT_INVALID;
// +1 for '\0' terminator
@@ -5015,7 +5011,7 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
if (!reader || *mm)
return false;
- std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
if (!mm_ptr.get())
return false;
@@ -5035,6 +5031,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
double value = 0;
const long long value_parse_status =
UnserializeFloat(reader, read_pos, child_size, value);
+ if (value < -FLT_MAX || value > FLT_MAX ||
+ (value > 0.0 && value < FLT_MIN)) {
+ return false;
+ }
mm_ptr->luminance_max = static_cast<float>(value);
if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
mm_ptr->luminance_max > 9999.99) {
@@ -5044,6 +5044,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
double value = 0;
const long long value_parse_status =
UnserializeFloat(reader, read_pos, child_size, value);
+ if (value < -FLT_MAX || value > FLT_MAX ||
+ (value > 0.0 && value < FLT_MIN)) {
+ return false;
+ }
mm_ptr->luminance_min = static_cast<float>(value);
if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
mm_ptr->luminance_min > 999.9999) {
@@ -5096,7 +5100,7 @@ bool Colour::Parse(IMkvReader* reader, long long colour_start,
if (!reader || *colour)
return false;
- std::auto_ptr<Colour> colour_ptr(new Colour());
+ std::unique_ptr<Colour> colour_ptr(new Colour());
if (!colour_ptr.get())
return false;
@@ -5194,7 +5198,7 @@ bool Projection::Parse(IMkvReader* reader, long long start, long long size,
if (!reader || *projection)
return false;
- std::auto_ptr<Projection> projection_ptr(new Projection());
+ std::unique_ptr<Projection> projection_ptr(new Projection());
if (!projection_ptr.get())
return false;
@@ -7903,6 +7907,10 @@ long Block::Parse(const Cluster* pCluster) {
return E_FILE_FORMAT_INVALID;
curr.len = static_cast<long>(frame_size);
+ // Check if size + curr.len could overflow.
+ if (size > LLONG_MAX - curr.len) {
+ return E_FILE_FORMAT_INVALID;
+ }
size += curr.len; // contribution of this frame
--frame_count;
@@ -7964,6 +7972,11 @@ long long Block::GetTimeCode(const Cluster* pCluster) const {
const long long tc0 = pCluster->GetTimeCode();
assert(tc0 >= 0);
+ // Check if tc0 + m_timecode would overflow.
+ if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) {
+ return -1;
+ }
+
const long long tc = tc0 + m_timecode;
return tc; // unscaled timecode units
@@ -7981,6 +7994,10 @@ long long Block::GetTime(const Cluster* pCluster) const {
const long long scale = pInfo->GetTimeCodeScale();
assert(scale >= 1);
+ // Check if tc * scale could overflow.
+ if (tc != 0 && scale > LLONG_MAX / tc) {
+ return -1;
+ }
const long long ns = tc * scale;
return ns;
diff --git a/tools/tiny_ssim.c b/tools/tiny_ssim.c
index 5e8ca02b4..6c1d784d3 100644
--- a/tools/tiny_ssim.c
+++ b/tools/tiny_ssim.c
@@ -91,6 +91,7 @@ typedef struct input_file {
int w;
int h;
int bit_depth;
+ int frame_size;
} input_file_t;
// Open a file and determine if its y4m or raw. If y4m get the header.
@@ -119,10 +120,12 @@ static int open_input_file(const char *file_name, input_file_t *input, int w,
fseek(input->file, 0, SEEK_SET);
input->w = w;
input->h = h;
- if (bit_depth < 9)
- input->buf = malloc(w * h * 3 / 2);
- else
- input->buf = malloc(w * h * 3);
+ // handle odd frame sizes
+ input->frame_size = w * h + ((w + 1) / 2 * (h + 1) / 2) * 2;
+ if (bit_depth > 8) {
+ input->frame_size *= 2;
+ input->buf = malloc(input->frame_size);
+ }
break;
}
}
@@ -150,15 +153,15 @@ static size_t read_input_file(input_file_t *in, unsigned char **y,
break;
case RAW_YUV:
if (bd < 9) {
- r1 = fread(in->buf, in->w * in->h * 3 / 2, 1, in->file);
+ r1 = fread(in->buf, in->frame_size, 1, in->file);
*y = in->buf;
*u = in->buf + in->w * in->h;
- *v = in->buf + 5 * in->w * in->h / 4;
+ *v = *u + (1 + in->w) / 2 * (1 + in->h) / 2;
} else {
- r1 = fread(in->buf, in->w * in->h * 3, 1, in->file);
+ r1 = fread(in->buf, in->frame_size, 1, in->file);
*y = in->buf;
- *u = in->buf + in->w * in->h / 2;
- *v = *u + in->w * in->h / 2;
+ *u = in->buf + (in->w * in->h) * 2;
+ *v = *u + 2 * ((1 + in->w) / 2 * (1 + in->h) / 2);
}
break;
}
@@ -325,7 +328,8 @@ static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
// (n*sum(xi*xi)-sum(xi)*sum(xi)+n*sum(yi*yi)-sum(yi)*sum(yi)+n*n*c2))
//
// Replace c1 with n*n * c1 for the final step that leads to this code:
-// The final step scales by 12 bits so we don't lose precision in the constants.
+// The final step scales by 12 bits so we don't lose precision in the
+// constants.
static double ssimv_similarity(const Ssimv *sv, int64_t n) {
// Scale the constants by number of pixels.
@@ -628,9 +632,10 @@ int main(int argc, char *argv[]) {
goto clean_up;
}
- // Number of frames to skip from file1.yuv for every frame used. Normal values
- // 0, 1 and 3 correspond to TL2, TL1 and TL0 respectively for a 3TL encoding
- // in mode 10. 7 would be reasonable for comparing TL0 of a 4-layer encoding.
+ // Number of frames to skip from file1.yuv for every frame used. Normal
+ // values 0, 1 and 3 correspond to TL2, TL1 and TL0 respectively for a 3TL
+ // encoding in mode 10. 7 would be reasonable for comparing TL0 of a 4-layer
+ // encoding.
if (argc > 4) {
sscanf(argv[4], "%d", &tl_skip);
if (argc > 5) {
@@ -644,12 +649,6 @@ int main(int argc, char *argv[]) {
}
}
- if (w & 1 || h & 1) {
- fprintf(stderr, "Invalid size %dx%d\n", w, h);
- return_value = 1;
- goto clean_up;
- }
-
while (1) {
size_t r1, r2;
unsigned char *y[2], *u[2], *v[2];
@@ -703,8 +702,10 @@ int main(int argc, char *argv[]) {
psnrv = realloc(psnrv, allocated_frames * sizeof(*psnrv));
}
psnr_and_ssim(ssimy[n_frames], psnry[n_frames], y[0], y[1], w, h);
- psnr_and_ssim(ssimu[n_frames], psnru[n_frames], u[0], u[1], w / 2, h / 2);
- psnr_and_ssim(ssimv[n_frames], psnrv[n_frames], v[0], v[1], w / 2, h / 2);
+ psnr_and_ssim(ssimu[n_frames], psnru[n_frames], u[0], u[1], (w + 1) / 2,
+ (h + 1) / 2);
+ psnr_and_ssim(ssimv[n_frames], psnrv[n_frames], v[0], v[1], (w + 1) / 2,
+ (h + 1) / 2);
n_frames++;
}
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index d67ee8a57..8c292d616 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -65,7 +65,7 @@ void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source,
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
- const MODE_INFO *mode_info_context = cm->show_frame_mi;
+ const MODE_INFO *mode_info_context = cm->mi;
int mbr, mbc;
/* The pixel thresholds are adjusted according to if or not the macroblock
diff --git a/vp8/decoder/decodeframe.c b/vp8/decoder/decodeframe.c
index 8bfd3cea3..0d54a9442 100644
--- a/vp8/decoder/decodeframe.c
+++ b/vp8/decoder/decodeframe.c
@@ -686,6 +686,12 @@ static unsigned int read_available_partition_size(
const unsigned char *partition_size_ptr = token_part_sizes + i * 3;
unsigned int partition_size = 0;
ptrdiff_t bytes_left = fragment_end - fragment_start;
+ if (bytes_left < 0) {
+ vpx_internal_error(
+ &pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt partition. No bytes left %d.",
+ (int)bytes_left);
+ }
/* Calculate the length of this partition. The last partition
* size is implicit. If the partition size can't be read, then
* either use the remaining data in the buffer (for EC mode)
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index b47840795..3b4fee7cf 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -428,7 +428,9 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
double weight_segment = 0;
int thresh_low_motion = (cm->width < 720) ? 55 : 20;
cr->apply_cyclic_refresh = 1;
- if (cm->frame_type == KEY_FRAME || cpi->svc.temporal_layer_id > 0 ||
+ // TODO(jianj): Look into issue of cyclic refresh with high bitdepth.
+ if (cm->bit_depth > 8 || cm->frame_type == KEY_FRAME ||
+ cpi->svc.temporal_layer_id > 0 ||
(cpi->use_svc &&
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
(!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion &&
@@ -457,6 +459,15 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_boost_fac = 13;
}
}
+ // For screen-content: keep rate_ratio_qdelta to 2.0 (segment#1 boost) and
+ // percent_refresh (refresh rate) to 10. But reduce rate boost for segment#2
+ // (rate_boost_fac = 10 disables segment#2).
+ // TODO(marpan): Consider increasing refresh rate after slide change.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
+ cr->percent_refresh = 10;
+ cr->rate_ratio_qdelta = 2.0;
+ cr->rate_boost_fac = 10;
+ }
// Adjust some parameters for low resolutions.
if (cm->width <= 352 && cm->height <= 288) {
if (rc->avg_frame_bandwidth < 3000) {
@@ -587,3 +598,12 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
}
+
+void vp9_cyclic_refresh_limit_q(const VP9_COMP *cpi, int *q) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ // For now apply hard limit to frame-level decrease in q, if the cyclic
+ // refresh is active (percent_refresh > 0).
+ if (cr->percent_refresh > 0 && cpi->rc.q_1_frame - *q > 8) {
+ *q = cpi->rc.q_1_frame - 8;
+ }
+}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 77fa67c9e..f59f193f6 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -139,6 +139,8 @@ static INLINE int cyclic_refresh_segment_id(int segment_id) {
return CR_SEGMENT_ID_BASE;
}
+void vp9_cyclic_refresh_limit_q(const struct VP9_COMP *cpi, int *q);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index c96dc3fbd..0b3eef7b3 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -556,6 +556,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
} else {
thresholds[1] = (5 * threshold_base) >> 1;
}
+ if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
}
}
@@ -4877,6 +4878,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
+#if CONFIG_CONSISTENT_RECODE
+ tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
+#endif
tile_data->mode_map[i][j] = j;
}
}
@@ -5001,7 +5005,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
#endif // CONFIG_VP9_HIGHBITDEPTH
x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
-
+#if CONFIG_CONSISTENT_RECODE
+ x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
+#endif
if (xd->lossless) x->optimize = 0;
cm->tx_mode = select_tx_mode(cpi, xd);
@@ -5126,9 +5132,48 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
return sum_delta / (cm->mi_rows * cm->mi_cols);
}
+#if CONFIG_CONSISTENT_RECODE
+static void restore_encode_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes[i][j] =
+ rd_opt->prediction_type_threshes_prev[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] =
+ tile_data->thresh_freq_fact_prev[i][j];
+ }
+ }
+ }
+ }
+
+ cm->interp_filter = cpi->sf.default_interp_filter;
+}
+#endif
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+#if CONFIG_CONSISTENT_RECODE
+ restore_encode_params(cpi);
+#endif
+
// In the longer term the encoder should be generalized to match the
// decoder such that we allow compound where one of the 3 buffers has a
// different sign bias and that buffer is then the fixed ref. However, this
@@ -5404,7 +5449,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
- if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
+ if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
+ (!cpi->use_svc ||
+ (cpi->use_svc &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
}
}
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 3384de7ea..fca8f331d 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3024,23 +3024,28 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
SVC *const svc = &cpi->svc;
if (cm->frame_type == KEY_FRAME) {
int i;
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
// On key frame update all reference frame slots.
for (i = 0; i < REF_FRAMES; i++) {
+ svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id;
// LAST/GOLDEN/ALTREF is already updated above.
if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx &&
i != cpi->alt_fb_idx)
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
}
} else {
- if (cpi->refresh_last_frame)
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- if (cpi->refresh_golden_frame)
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- if (cpi->refresh_alt_ref_frame)
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_last_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_golden_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_alt_ref_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id;
+ }
}
// Copy flags from encoder to SVC struct.
vp9_copy_flags_ref_update_idx(cpi);
@@ -3574,8 +3579,41 @@ static void set_frame_size(VP9_COMP *cpi) {
set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
}
-static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
- uint8_t *dest) {
+#if CONFIG_CONSISTENT_RECODE
+static void save_encode_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes_prev[i][j] =
+ rd_opt->prediction_type_threshes[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact_prev[i][j] =
+ tile_data->thresh_freq_fact[i][j];
+ }
+ }
+ }
+ }
+}
+#endif
+
+static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
+ uint8_t *dest) {
VP9_COMMON *const cm = &cpi->common;
int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
const INTERP_FILTER filter_scaler =
@@ -3686,12 +3724,23 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
(cpi->oxcf.rc_mode == VPX_VBR ||
cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
- (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc)))
+ (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
vp9_scene_detection_onepass(cpi);
if (cpi->svc.spatial_layer_id == 0)
cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;
+ // For 1 pass CBR, check if we are dropping this frame.
+ // Never drop on key frame, or if base layer is key for svc.
+ // Don't drop on scene change.
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
+ cm->frame_type != KEY_FRAME && !cpi->rc.high_source_sad &&
+ !cpi->svc.high_source_sad_superframe &&
+ (!cpi->use_svc ||
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ if (vp9_rc_drop_frame(cpi)) return 0;
+ }
+
// For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames).
@@ -3715,7 +3764,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
}
- if (cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
+ // TODO(jianj): Look into issue of skin detection with high bitdepth.
+ if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
cpi->oxcf.rc_mode == VPX_CBR &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
@@ -3729,10 +3779,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
suppress_active_map(cpi);
- // For SVC on non-zero spatial layer: check for disabling inter-layer
- // prediction.
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0)
- vp9_svc_constrain_inter_layer_pred(cpi);
+ if (cpi->use_svc) {
+ // On non-zero spatial layer, check for disabling inter-layer
+ // prediction.
+ if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
+ vp9_svc_assert_constraints_pattern(cpi);
+ }
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
@@ -3795,6 +3847,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// seen in the last encoder iteration.
// update_base_skip_probs(cpi);
vpx_clear_system_state();
+ return 1;
}
#define MAX_QSTEP_ADJ 4
@@ -4485,11 +4538,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cpi->oxcf.target_bandwidth == 0) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
- vp9_inc_frame_in_layer(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
cpi->last_frame_dropped = 1;
cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
+ if (cpi->svc.framedrop_mode == LAYER_DROP ||
+ cpi->svc.drop_spatial_layer[0] == 0) {
+ // For the case of constrained drop mode where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
return;
}
@@ -4538,55 +4601,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
}
}
- // For 1 pass CBR, check if we are dropping this frame.
- // Never drop on key frame, or if base layer is key for svc.
- if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
- cm->frame_type != KEY_FRAME &&
- (!cpi->use_svc ||
- !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
- int svc_prev_layer_dropped = 0;
- // In the contrained framedrop mode for svc (framedrop_mode =
- // CONSTRAINED_LAYER_DROP), if the previous spatial layer was dropped, drop
- // the current spatial layer.
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1])
- svc_prev_layer_dropped = 1;
- if ((svc_prev_layer_dropped &&
- cpi->svc.framedrop_mode == CONSTRAINED_LAYER_DROP) ||
- vp9_rc_drop_frame(cpi)) {
- vp9_rc_postencode_update_drop_frame(cpi);
- cpi->ext_refresh_frame_flags_pending = 0;
- cpi->last_frame_dropped = 1;
- if (cpi->use_svc) {
- cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
- cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
- vp9_inc_frame_in_layer(cpi);
- cpi->svc.skip_enhancement_layer = 1;
- if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
- int i;
- int all_layers_drop = 1;
- for (i = 0; i < cpi->svc.spatial_layer_id; i++) {
- if (cpi->svc.drop_spatial_layer[i] == 0) {
- all_layers_drop = 0;
- break;
- }
- }
- if (all_layers_drop == 1) cpi->svc.skip_enhancement_layer = 0;
- }
- }
- return;
- }
- }
-
vpx_clear_system_state();
#if CONFIG_INTERNAL_STATS
memset(cpi->mode_chosen_counts, 0,
MAX_MODES * sizeof(*cpi->mode_chosen_counts));
#endif
+#if CONFIG_CONSISTENT_RECODE
+ // Backup to ensure consistency between recodes
+ save_encode_params(cpi);
+#endif
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
- encode_without_recode_loop(cpi, size, dest);
+ if (!encode_without_recode_loop(cpi, size, dest)) return;
} else {
encode_with_recode_loop(cpi, size, dest);
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 1e0ed70fb..f66c13046 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -282,6 +282,9 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+#if CONFIG_CONSISTENT_RECODE
+ int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES];
+#endif
int8_t mode_map[BLOCK_SIZES][MAX_MODES];
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 453879fb8..c76dfd351 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2201,8 +2201,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// Define middle frame
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
- normal_frames =
- rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
+ normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
if (normal_frames > 1)
normal_frame_bits = (int)(total_group_bits / normal_frames);
else
@@ -2441,9 +2440,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
// Monitor for static sections.
- if ((rc->frames_since_key + i - 1) > 1) {
- zero_motion_accumulator *= get_zero_motion_factor(cpi, &next_frame);
- }
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@@ -2464,18 +2462,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Break out conditions.
- // Break at maximum of active_max_gf_interval unless almost totally static.
- //
- // Note that the addition of a test of rc->source_alt_ref_active is
- // deliberate. The effect of this is that after a normal altref group even
- // if the material is static there will be one normal length GF group
- // before allowing longer GF groups. The reason for this is that in cases
- // such as slide shows where slides are separated by a complex transition
- // such as a fade, the arf group spanning the transition may not be coded
- // at a very high quality and hence this frame (with its overlay) is a
- // poor golden frame to use for an extended group.
- if (((i >= active_max_gf_interval) &&
- ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
+ if (
+ // Break at active_max_gf_interval unless almost totally static.
+ ((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
(
// Don't break out with a very short interval.
(i >= active_min_gf_interval) &&
@@ -2495,8 +2484,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame.
- if ((zero_motion_accumulator < 0.995) && allow_alt_ref &&
- (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
+ if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
+ (i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1
: VPXMAX(0, rc->frames_to_key - i);
@@ -2523,11 +2512,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200);
#endif
- rc->baseline_gf_interval =
- ((twopass->kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) &&
- (i >= rc->frames_to_key))
- ? i
- : (i - (is_key_frame || rc->source_alt_ref_pending));
+ // Set the interval until the next gf.
+ rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -2774,7 +2760,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
vp9_zero(next_frame);
cpi->common.frame_type = KEY_FRAME;
- rc->frames_since_key = 0;
// Reset the GF group data structures.
vp9_zero(*gf_group);
@@ -2919,22 +2904,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
for (i = 0; i < (rc->frames_to_key - 1); ++i) {
if (EOF == input_stats(twopass, &next_frame)) break;
- // The zero motion test here insures that if we mark a kf group as static
- // it is static throughout not just the first KF_BOOST_SCAN_MAX_FRAMES.
- // It also allows for a larger boost on long static groups.
- if ((i <= KF_BOOST_SCAN_MAX_FRAMES) || (zero_motion_accumulator >= 0.99)) {
+ if (i <= KF_BOOST_SCAN_MAX_FRAMES) {
double frame_boost;
double zm_factor;
// Monitor for static sections.
- // First frame in kf group the second ref indicator is invalid.
- if (i > 0) {
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
- } else {
- zero_motion_accumulator =
- next_frame.pcnt_inter - next_frame.pcnt_motion;
- }
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Factor 0.75-1.25 based on how much of frame is static.
zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
@@ -2971,16 +2947,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_position, twopass->stats_in_end, rc->frames_to_key);
- // Special case for static / slide show content but dont apply
- // if the kf group is very short.
- if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
- rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST);
- } else {
- // Apply various clamps for min and max boost
- rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
- rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
- rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
- }
+ // Apply various clamps for min and max boost
+ rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
+ rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
+ rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index aa497e3da..000ecd779 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -120,12 +120,12 @@ typedef enum {
typedef struct {
unsigned char index;
unsigned char first_inter_index;
- RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
- FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1];
- unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
- unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
- unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
- int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
+ FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
+ int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
} GF_GROUP;
typedef struct {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 1ba518af8..60d5c89b1 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (rv && search_subpel) {
int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+ if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
+ int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+ if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
+ abs(tmp_mv->as_mv.col) >= mv_thresh)
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
+ else
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below;
+ }
cpi->find_fractional_mv_step(
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
@@ -1421,7 +1429,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- const SVC *const svc = &cpi->svc;
+ SVC *const svc = &cpi->svc;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -1495,7 +1503,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif
INTERP_FILTER filter_gf_svc = EIGHTTAP;
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
- MV_REFERENCE_FRAME spatial_ref = GOLDEN_FRAME;
+ MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME;
const struct segmentation *const seg = &cm->seg;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
@@ -1504,25 +1512,25 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int svc_mv_row = 0;
int no_scaling = 0;
unsigned int thresh_svc_skip_golden = 500;
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) {
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id - 1,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ if (cpi->use_svc && svc->spatial_layer_id > 0) {
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1;
}
- if (cpi->svc.spatial_layer_id > 0 &&
- (cpi->svc.high_source_sad_superframe || no_scaling))
+ if (svc->spatial_layer_id > 0 &&
+ (svc->high_source_sad_superframe || no_scaling))
thresh_svc_skip_golden = 0;
// Lower the skip threshold if lower spatial layer is better quality relative
// to current layer.
- else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
- cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 &&
+ cm->base_qindex > svc->lower_layer_qindex + 15)
thresh_svc_skip_golden = 100;
// Increase skip threshold if lower spatial layer is lower quality relative
// to current layer.
- else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
- cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 &&
+ cm->base_qindex < svc->lower_layer_qindex - 20)
thresh_svc_skip_golden = 1000;
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1585,10 +1593,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
if (cpi->use_svc) {
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame;
}
if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
@@ -1623,19 +1631,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For svc mode, on spatial_layer_id > 0: if the reference has different scale
// constrain the inter mode to only test zero motion.
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
- cpi->svc.spatial_layer_id > 0) {
+ svc->spatial_layer_id > 0) {
if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
svc_force_zero_mode[LAST_FRAME - 1] = 1;
- spatial_ref = LAST_FRAME;
+ inter_layer_ref = LAST_FRAME;
}
}
if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
- spatial_ref = GOLDEN_FRAME;
+ inter_layer_ref = GOLDEN_FRAME;
}
}
}
@@ -1652,6 +1660,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
}
+ if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad ||
+ cpi->rc.avg_frame_low_motion < 60))
+ usable_ref_frame = LAST_FRAME;
+
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
!svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
@@ -1677,6 +1689,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
+ // Skip find_predictor if the reference frame is not in the
+ // ref_frame_flags (i.e., not used as a reference for this frame).
+ skip_ref_find_pred[ref_frame] =
+ !(cpi->ref_frame_flags & flag_list[ref_frame]);
if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
&ref_frame_skip_mask, flag_list, tile_data, mi_row,
@@ -1692,9 +1708,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// an averaging filter for downsampling (phase = 8). If so, we will test
// a nonzero motion mode on the spatial reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- svc_force_zero_mode[spatial_ref - 1] &&
- cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc_force_zero_mode[inter_layer_ref - 1] &&
+ svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
@@ -1713,7 +1729,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int inter_mv_mode = 0;
int skip_this_mv = 0;
int comp_pred = 0;
- int force_gf_mv = 0;
+ int force_mv_inter_layer = 0;
PREDICTION_MODE this_mode;
second_ref_frame = NONE;
@@ -1743,8 +1759,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
- if (flag_svc_subpel && ref_frame == spatial_ref) {
- force_gf_mv = 1;
+ if (flag_svc_subpel && ref_frame == inter_layer_ref) {
+ force_mv_inter_layer = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
if (this_mode == NEWMV) {
@@ -1771,8 +1787,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
sse_zeromv_normalized < thresh_svc_skip_golden)
continue;
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
+
if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
- this_mode != NEARESTMV) {
+ frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1802,14 +1820,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
}
- if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
-
if (const_motion[ref_frame] && this_mode == NEARMV) continue;
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later.
- if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
+ if (!force_mv_inter_layer && force_skip_low_temp_var &&
+ ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1823,7 +1840,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (cpi->use_svc) {
- if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] &&
+ if (!force_mv_inter_layer && svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
continue;
}
@@ -1883,9 +1900,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
(!cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])))
- continue;
+ if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
- if (this_mode == NEWMV && !force_gf_mv) {
+ if (this_mode == NEWMV && !force_mv_inter_layer) {
if (ref_frame > LAST_FRAME && !cpi->use_svc &&
cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
@@ -1931,7 +1948,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16,
// for SVC encoding.
- if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 &&
+ if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 &&
frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
frame_mv[NEWMV][ref_frame].as_mv.col == 0)
continue;
@@ -2028,7 +2045,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
(ref_frame == LAST_FRAME ||
- (ref_frame == GOLDEN_FRAME && !force_gf_mv &&
+ (ref_frame == GOLDEN_FRAME && !force_mv_inter_layer &&
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3];
@@ -2254,12 +2271,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// layer is chosen as the reference. Always perform intra prediction if
// LAST is the only reference, or is_key_frame is set, or on base
// temporal layer.
- if (cpi->svc.spatial_layer_id) {
+ if (svc->spatial_layer_id) {
perform_intra_pred =
- cpi->svc.temporal_layer_id == 0 ||
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
+ svc->temporal_layer_id == 0 ||
+ svc->layer_context[svc->temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
- (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ (!svc->layer_context[svc->temporal_layer_id].is_key_frame &&
svc_force_zero_mode[best_ref_frame - 1]);
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 599337f80..11547fb2e 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -273,6 +273,14 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
const VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ // On dropped frame, don't update buffer if its currently stable
+ // (above optimal level). This can cause issues when full superframe
+ // can drop (!= LAYER_DROP), since QP is adjusted downwards with buffer
+ // overflow, which can cause more frame drops.
+ if (cpi->svc.framedrop_mode != LAYER_DROP && encoded_frame_size == 0 &&
+ rc->buffer_level > rc->optimal_buffer_level)
+ return;
+
// Non-viewable frames are a special case and are treated as pure overhead.
if (!cm->show_frame) {
rc->bits_off_target -= encoded_frame_size;
@@ -390,7 +398,31 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
}
-static int check_buffer(VP9_COMP *cpi, int drop_mark) {
+static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) {
+ SVC *svc = &cpi->svc;
+ if (!cpi->use_svc || cpi->svc.framedrop_mode != FULL_SUPERFRAME_DROP) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ return (rc->buffer_level > drop_mark);
+ } else {
+ int i;
+ // For SVC in the FULL_SUPERFRAME_DROP): the condition on
+ // buffer (if its above threshold, so no drop) is checked on current and
+ // upper spatial layers. If any spatial layer is not above threshold then
+ // we return 0.
+ for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ const int drop_mark_layer =
+ (int)(cpi->svc.framedrop_thresh[i] * lrc->optimal_buffer_level / 100);
+ if (!(lrc->buffer_level > drop_mark_layer)) return 0;
+ }
+ return 1;
+ }
+}
+
+static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) {
SVC *svc = &cpi->svc;
if (!cpi->use_svc || cpi->svc.framedrop_mode == LAYER_DROP) {
RATE_CONTROL *const rc = &cpi->rc;
@@ -398,8 +430,10 @@ static int check_buffer(VP9_COMP *cpi, int drop_mark) {
} else {
int i;
// For SVC in the constrained framedrop mode (svc->framedrop_mode =
- // CONSTRAINED_LAYER_DROP): the condition on buffer (to drop frame) is
- // checked on current and upper spatial layers.
+ // CONSTRAINED_LAYER_DROP or FULL_SUPERFRAME_DROP): the condition on
+ // buffer (if its below threshold, so drop frame) is checked on current
+ // and upper spatial layers. For FULL_SUPERFRAME_DROP mode if any
+ // spatial layer is <= threshold, then we return 1 (drop).
for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) {
const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
svc->number_temporal_layers);
@@ -407,23 +441,42 @@ static int check_buffer(VP9_COMP *cpi, int drop_mark) {
RATE_CONTROL *lrc = &lc->rc;
const int drop_mark_layer =
(int)(cpi->svc.framedrop_thresh[i] * lrc->optimal_buffer_level / 100);
- if (!(lrc->buffer_level <= drop_mark_layer)) return 0;
+ if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) {
+ if (lrc->buffer_level <= drop_mark_layer) return 1;
+ } else {
+ if (!(lrc->buffer_level <= drop_mark_layer)) return 0;
+ }
}
- return 1;
+ if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP)
+ return 0;
+ else
+ return 1;
}
}
-int vp9_rc_drop_frame(VP9_COMP *cpi) {
+static int drop_frame(VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+ SVC *svc = &cpi->svc;
int drop_frames_water_mark = oxcf->drop_frames_water_mark;
- if (cpi->use_svc)
- drop_frames_water_mark =
- cpi->svc.framedrop_thresh[cpi->svc.spatial_layer_id];
- if (!drop_frames_water_mark) {
+ if (cpi->use_svc) {
+ // If we have dropped max_consec_drop frames, then we don't
+ // drop this spatial layer, and reset counter to 0.
+ if (svc->drop_count[svc->spatial_layer_id] == svc->max_consec_drop) {
+ svc->drop_count[svc->spatial_layer_id] = 0;
+ return 0;
+ } else {
+ drop_frames_water_mark = svc->framedrop_thresh[svc->spatial_layer_id];
+ }
+ }
+ if (!drop_frames_water_mark ||
+ (svc->spatial_layer_id > 0 &&
+ svc->framedrop_mode == FULL_SUPERFRAME_DROP)) {
return 0;
} else {
- if (rc->buffer_level < 0) {
+ if ((rc->buffer_level < 0 && svc->framedrop_mode != FULL_SUPERFRAME_DROP) ||
+ (check_buffer_below_thresh(cpi, -1) &&
+ svc->framedrop_mode == FULL_SUPERFRAME_DROP)) {
// Always drop if buffer is below 0.
return 1;
} else {
@@ -431,9 +484,11 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark =
(int)(drop_frames_water_mark * rc->optimal_buffer_level / 100);
- if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
+ if (check_buffer_above_thresh(cpi, drop_mark) &&
+ (rc->decimation_factor > 0)) {
--rc->decimation_factor;
- } else if (check_buffer(cpi, drop_mark) && rc->decimation_factor == 0) {
+ } else if (check_buffer_below_thresh(cpi, drop_mark) &&
+ rc->decimation_factor == 0) {
rc->decimation_factor = 1;
}
if (rc->decimation_factor > 0) {
@@ -452,6 +507,75 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
}
}
+int vp9_rc_drop_frame(VP9_COMP *cpi) {
+ SVC *svc = &cpi->svc;
+ int svc_prev_layer_dropped = 0;
+ // In the constrained or full_superframe framedrop mode for svc
+ // (framedrop_mode != LAYER_DROP), if the previous spatial layer was
+ // dropped, drop the current spatial layer.
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc->drop_spatial_layer[svc->spatial_layer_id - 1])
+ svc_prev_layer_dropped = 1;
+ if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) ||
+ drop_frame(cpi)) {
+ vp9_rc_postencode_update_drop_frame(cpi);
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->last_frame_dropped = 1;
+ if (cpi->use_svc) {
+ svc->last_layer_dropped[svc->spatial_layer_id] = 1;
+ svc->drop_spatial_layer[svc->spatial_layer_id] = 1;
+ svc->drop_count[svc->spatial_layer_id]++;
+ svc->skip_enhancement_layer = 1;
+ if (svc->framedrop_mode == LAYER_DROP ||
+ svc->drop_spatial_layer[0] == 0) {
+ // For the case of constrained drop mode where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
+ if (svc->spatial_layer_id == svc->number_spatial_layers - 1) {
+ int i;
+ int all_layers_drop = 1;
+ for (i = 0; i < svc->spatial_layer_id; i++) {
+ if (svc->drop_spatial_layer[i] == 0) {
+ all_layers_drop = 0;
+ break;
+ }
+ }
+ if (all_layers_drop == 1) svc->skip_enhancement_layer = 0;
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int adjust_q_cbr(const VP9_COMP *cpi, int q) {
+ // This makes sure q is between oscillating Qs to prevent resonance.
+ if (!cpi->rc.reset_high_source_sad &&
+ (!cpi->oxcf.gf_cbr_boost_pct ||
+ !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
+ (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
+ cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
+ int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
+ VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ // If the previous frame had overshoot and the current q needs to increase
+ // above the clamped value, reduce the clamp for faster reaction to
+ // overshoot.
+ if (cpi->rc.rc_1_frame == -1 && q > qclamp)
+ q = (q + qclamp) >> 1;
+ else
+ q = qclamp;
+ }
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ vp9_cyclic_refresh_limit_q(cpi, &q);
+ return q;
+}
+
static double get_rate_correction_factor(const VP9_COMP *cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
double rcf;
@@ -610,22 +734,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
}
} while (++i <= active_worst_quality);
- // In CBR mode, this makes sure q is between oscillating Qs to prevent
- // resonance.
- if (cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.reset_high_source_sad &&
- (!cpi->oxcf.gf_cbr_boost_pct ||
- !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
- (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
- cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
- int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
- VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
- // If the previous had overshoot and the current q needs to increase above
- // the clamped value, reduce the clamp for faster reaction to overshoot.
- if (cpi->rc.rc_1_frame == -1 && q > qclamp)
- q = (q + qclamp) >> 1;
- else
- q = qclamp;
- }
+ // Adjustment to q for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) return adjust_q_cbr(cpi, q);
+
return q;
}
@@ -730,8 +841,10 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
- // Maximum limit for down adjustment, ~30%.
+ // Maximum limit for down adjustment ~30%; make it lower for screen content.
int max_adjustment_down = active_worst_quality / 3;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ max_adjustment_down = active_worst_quality >> 3;
if (max_adjustment_down) {
buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
max_adjustment_down);
@@ -1118,9 +1231,6 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Baseline value derived from cpi->active_worst_quality and kf boost.
active_best_quality =
get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
- if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
- active_best_quality /= 4;
- }
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -1500,7 +1610,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
if (oxcf->pass == 0) {
- if (cm->frame_type != KEY_FRAME) {
+ if (cm->frame_type != KEY_FRAME &&
+ (!cpi->use_svc ||
+ (cpi->use_svc &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) {
compute_frame_low_motion(cpi);
if (cpi->sf.use_altref_onepass) update_altref_usage(cpi);
}
@@ -1867,8 +1981,13 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval);
- // Extended max interval for genuinely static scenes like slide shows.
- rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+
+ if (is_altref_enabled(cpi)) {
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
rc->max_gf_interval = rc->static_scene_max_gf_interval;
@@ -2426,6 +2545,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad)
rc->this_frame_target = rc->avg_frame_bandwidth;
}
+ // For SVC the new (updated) avg_source_sad[0] for the current superframe
+ // updates the setting for all layers.
+ if (cpi->use_svc) {
+ int sl, tl;
+ SVC *const svc = &cpi->svc;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl)
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_source_sad[0] = rc->avg_source_sad[0];
+ }
+ }
// For VBR, under scene change/high content change, force golden refresh.
if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME &&
rc->high_source_sad && rc->frames_to_key > 3 &&
@@ -2459,8 +2591,11 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
- int thresh_qp = 3 * (rc->worst_quality >> 2);
- int thresh_rate = rc->avg_frame_bandwidth * 10;
+ int thresh_qp = 7 * (rc->worst_quality >> 3);
+ int thresh_rate = rc->avg_frame_bandwidth << 3;
+ // Lower rate threshold for video.
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ thresh_rate = rc->avg_frame_bandwidth << 2;
if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) {
double rate_correction_factor =
cpi->rc.rate_correction_factors[INTER_NORMAL];
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 3a40e0138..c1b210677 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -34,14 +34,6 @@ extern "C" {
#define FRAME_OVERHEAD_BITS 200
-// Threshold used to define a KF group as static (e.g. a slide show).
-// Essentially this means that no frame in the group has more than 1% of MBs
-// that are not marked as coded with 0,0 motion in the first pass.
-#define STATIC_KF_GROUP_THRESH 99
-
-// The maximum duration of a GF group that is static (for example a slide show).
-#define MAX_STATIC_GF_GROUP_LENGTH 250
-
typedef enum {
INTER_NORMAL = 0,
INTER_HIGH = 1,
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 59022c106..8201bba70 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -108,7 +108,11 @@ typedef struct RD_OPT {
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#if CONFIG_CONSISTENT_RECODE
+ int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES];
+ int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#endif
int RDMULT;
int RDDIV;
} RD_OPT;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e39df033a..e3672edf5 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -847,7 +847,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
{ INT64_MAX, INT64_MAX } };
int n;
int s0, s1;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = ref_best_rd;
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
const int tx_size_ctx = get_tx_size_context(xd);
@@ -868,8 +868,8 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
for (n = start_tx; n >= end_tx; n--) {
const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n];
- txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
- bs, n, cpi->sf.use_fast_coef_costing);
+ txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
r[n][1] += r_tx_size;
@@ -3612,9 +3612,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
- // If adaptive interp filter is enabled, then the current leaf node of 8x8
- // data is needed for sub8x8. Hence preserve the context.
+// If adaptive interp filter is enabled, then the current leaf node of 8x8
+// data is needed for sub8x8. Hence preserve the context.
+#if CONFIG_CONSISTENT_RECODE
+ if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#else
if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#endif
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
return;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 90da68726..d2842697d 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -375,6 +375,8 @@ static void set_rt_speed_feature_framesize_independent(
sf->nonrd_keyframe = 0;
sf->svc_use_lowres_part = 0;
sf->re_encode_overshoot_rt = 0;
+ sf->disable_16x16part_nonkey = 0;
+ sf->disable_golden_ref = 0;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -537,8 +539,14 @@ static void set_rt_speed_feature_framesize_independent(
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
- cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
sf->re_encode_overshoot_rt = 1;
+ }
+ if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
+ cm->width <= 1280 && cm->height <= 720) {
+ sf->use_altref_onepass = 1;
+ sf->use_compound_nonrd_pickmode = 1;
+ }
}
if (speed >= 6) {
@@ -661,6 +669,21 @@ static void set_rt_speed_feature_framesize_independent(
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 1;
}
+
+ if (speed >= 9) {
+ sf->mv.enable_adaptive_subpel_force_stop = 1;
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
+ if (cpi->rc.avg_frame_low_motion < 40)
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_below = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_above = 2;
+ // Disable partition blocks below 16x16, except for low-resolutions.
+ if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240)
+ sf->disable_16x16part_nonkey = 1;
+ // Allow for disabling GOLDEN reference, for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1;
+ }
+
if (sf->use_altref_onepass) {
if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
sf->partition_search_type = FIXED_PARTITION;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 946bf0545..251cfdbcd 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -161,6 +161,17 @@ typedef enum {
ONE_LOOP_REDUCED = 1
} FAST_COEFF_UPDATE;
+typedef struct ADAPT_SUBPEL_FORCE_STOP {
+ // Threshold for full pixel motion vector;
+ int mv_thresh;
+
+ // subpel_force_stop if full pixel MV is below the threshold.
+ int force_stop_below;
+
+ // subpel_force_stop if full pixel MV is equal to or above the threshold.
+ int force_stop_above;
+} ADAPT_SUBPEL_FORCE_STOP;
+
typedef struct MV_SPEED_FEATURES {
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
@@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES {
// 3: Stop at full pixel.
int subpel_force_stop;
+ // If it's enabled, different subpel_force_stop will be used for different MV.
+ int enable_adaptive_subpel_force_stop;
+
+ ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop;
+
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
} MV_SPEED_FEATURES;
@@ -515,6 +531,12 @@ typedef struct SPEED_FEATURES {
// Enable re-encoding on scene change with potential high overshoot,
// for real-time encoding flow.
int re_encode_overshoot_rt;
+
+ // Disable partitioning of 16x16 blocks.
+ int disable_16x16part_nonkey;
+
+ // Allow for disabling golden reference.
+ int disable_golden_ref;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 07d1995a8..d745ae0df 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -41,7 +41,10 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON;
svc->framedrop_mode = CONSTRAINED_LAYER_DROP;
- for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
+ for (i = 0; i < REF_FRAMES; ++i) {
+ svc->fb_idx_spatial_layer_id[i] = -1;
+ svc->fb_idx_temporal_layer_id[i] = -1;
+ }
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
svc->last_layer_dropped[sl] = 0;
svc->drop_spatial_layer[sl] = 0;
@@ -52,7 +55,10 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->downsample_filter_type[sl] = BILINEAR;
svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
+ svc->fb_idx_upd_tl0[sl] = -1;
+ svc->drop_count[sl] = 0;
}
+ svc->max_consec_drop = INT_MAX;
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH,
@@ -787,7 +793,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0;
if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
- cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id]) {
+ cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] &&
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] != -1 &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
// For fixed/non-flexible mode, if the previous frame (same spatial layer
// from previous superframe) was dropped, make sure the lst_fb_idx
// for this frame corresponds to the buffer index updated on (last) encoded
@@ -903,12 +911,11 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
}
}
}
- // Check for disabling inter-layer prediction if
- // INTER_LAYER_PRED_ON_CONSTRAINED is enabled.
- // If the reference for inter-layer prediction (the reference that is scaled)
- // is not the previous spatial layer from the same superframe, then we
- // disable inter-layer prediction.
- if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_ON_CONSTRAINED) {
+ // Check for disabling inter-layer prediction if the reference for inter-layer
+ // prediction (the reference that is scaled) is not the previous spatial layer
+ // from the same superframe, then we disable inter-layer prediction.
+ // Only need to check when inter_layer prediction is not set to OFF mode.
+ if (cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_OFF) {
// We only use LAST and GOLDEN for prediction in real-time mode, so we
// check both here.
MV_REFERENCE_FRAME ref_frame;
@@ -940,3 +947,46 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
}
}
}
+
+void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // For fixed/non-flexible mode, the folllowing constraint are expected,
+ // when inter-layer prediciton is on (default).
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON &&
+ svc->framedrop_mode != LAYER_DROP) {
+ if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+ // On non-key frames: LAST is always temporal reference, GOLDEN is
+ // spatial reference.
+ if (svc->temporal_layer_id == 0)
+ // Base temporal only predicts from base temporal.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0);
+ else
+ // Non-base temporal only predicts from lower temporal layer.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] <
+ svc->temporal_layer_id);
+ if (svc->spatial_layer_id > 0) {
+ // Non-base spatial only predicts from lower spatial layer with same
+ // temporal_id.
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ } else if (svc->spatial_layer_id > 0) {
+ // Only 1 reference for frame whose base is key; reference may be LAST
+ // or GOLDEN, so we check both.
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] ==
+ svc->temporal_layer_id);
+ } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ }
+ }
+}
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 617717049..9be5bb7ea 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -96,7 +96,6 @@ typedef struct SVC {
int lst_fb_idx[VPX_MAX_LAYERS];
int gld_fb_idx[VPX_MAX_LAYERS];
int alt_fb_idx[VPX_MAX_LAYERS];
- int ref_frame_index[REF_FRAMES];
int force_zero_mode_spatial_ref;
int current_superframe;
int non_reference_frame;
@@ -122,6 +121,8 @@ typedef struct SVC {
int last_layer_dropped[VPX_MAX_LAYERS];
int drop_spatial_layer[VPX_MAX_LAYERS];
int framedrop_thresh[VPX_MAX_LAYERS];
+ int drop_count[VPX_MAX_LAYERS];
+ int max_consec_drop;
SVC_LAYER_DROP_MODE framedrop_mode;
INTER_LAYER_PRED disable_inter_layer_pred;
@@ -141,7 +142,12 @@ typedef struct SVC {
// Keep track of the frame buffer index updated/refreshed on the base
// temporal superframe.
- uint8_t fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+ int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+
+ // Keep track of the spatial and temporal layer id of the frame that last
+ // updated the frame buffer index.
+ uint8_t fb_idx_spatial_layer_id[REF_FRAMES];
+ uint8_t fb_idx_temporal_layer_id[REF_FRAMES];
} SVC;
struct VP9_COMP;
@@ -201,6 +207,8 @@ void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi);
+void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 5eaa7a18a..2758314fb 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(extra_cfg, row_mt, 0, 1);
RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2);
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
- RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+ RANGE_CHECK(extra_cfg, cpu_used, -9, 9);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
@@ -709,6 +709,8 @@ static vpx_codec_err_t ctrl_set_noise_sensitivity(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.noise_sensitivity = CAST(VP9E_SET_NOISE_SENSITIVITY, args);
+ // TODO(jianj): Look into issue of noise estimation with high bitdepth.
+ if (ctx->cfg.g_bit_depth > 8) extra_cfg.noise_sensitivity = 0;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1536,6 +1538,8 @@ static vpx_codec_err_t ctrl_set_svc_frame_drop_layer(vpx_codec_alg_priv_t *ctx,
cpi->svc.framedrop_mode = data->framedrop_mode;
for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl)
cpi->svc.framedrop_thresh[sl] = data->framedrop_thresh[sl];
+ // Don't allow max_consec_drop values below 1.
+ cpi->svc.max_consec_drop = VPXMAX(1, data->max_consec_drop);
return VPX_CODEC_OK;
}
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index b201d96f4..44519e063 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -786,7 +786,8 @@ typedef struct vpx_svc_ref_frame_config {
typedef enum {
CONSTRAINED_LAYER_DROP,
/**< Upper layers are constrained to drop if current layer drops. */
- LAYER_DROP, /**< Any spatial layer can drop. */
+ LAYER_DROP, /**< Any spatial layer can drop. */
+ FULL_SUPERFRAME_DROP, /**< Only full superframe can drop. */
} SVC_LAYER_DROP_MODE;
/*!\brief vp9 svc frame dropping parameters.
@@ -799,7 +800,8 @@ typedef enum {
typedef struct vpx_svc_frame_drop {
int framedrop_thresh[VPX_SS_MAX_LAYERS]; /**< Frame drop thresholds */
SVC_LAYER_DROP_MODE
- framedrop_mode; /**< Layer-based or constrained dropping. */
+ framedrop_mode; /**< Layer-based or constrained dropping. */
+ int max_consec_drop; /**< Maximum consecutive drops, for any layer. */
} vpx_svc_frame_drop_t;
/*!\cond */
diff --git a/vpx_dsp/arm/avg_pred_neon.c b/vpx_dsp/arm/avg_pred_neon.c
index 1370ec2d2..5afdece0a 100644
--- a/vpx_dsp/arm/avg_pred_neon.c
+++ b/vpx_dsp/arm/avg_pred_neon.c
@@ -17,8 +17,8 @@
void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride) {
if (width > 8) {
- int x, y;
- for (y = 0; y < height; ++y) {
+ int x, y = height;
+ do {
for (x = 0; x < width; x += 16) {
const uint8x16_t p = vld1q_u8(pred + x);
const uint8x16_t r = vld1q_u8(ref + x);
@@ -28,28 +28,38 @@ void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width,
comp += width;
pred += width;
ref += ref_stride;
- }
+ } while (--y);
+ } else if (width == 8) {
+ int i = width * height;
+ do {
+ const uint8x16_t p = vld1q_u8(pred);
+ uint8x16_t r;
+ const uint8x8_t r_0 = vld1_u8(ref);
+ const uint8x8_t r_1 = vld1_u8(ref + ref_stride);
+ r = vcombine_u8(r_0, r_1);
+ ref += 2 * ref_stride;
+ r = vrhaddq_u8(r, p);
+ vst1q_u8(comp, r);
+
+ pred += 16;
+ comp += 16;
+ i -= 16;
+ } while (i);
} else {
- int i;
- for (i = 0; i < width * height; i += 16) {
+ int i = width * height;
+ assert(width == 4);
+ do {
const uint8x16_t p = vld1q_u8(pred);
uint8x16_t r;
- if (width == 4) {
- r = load_unaligned_u8q(ref, ref_stride);
- ref += 4 * ref_stride;
- } else {
- const uint8x8_t r_0 = vld1_u8(ref);
- const uint8x8_t r_1 = vld1_u8(ref + ref_stride);
- assert(width == 8);
- r = vcombine_u8(r_0, r_1);
- ref += 2 * ref_stride;
- }
+ r = load_unaligned_u8q(ref, ref_stride);
+ ref += 4 * ref_stride;
r = vrhaddq_u8(r, p);
vst1q_u8(comp, r);
pred += 16;
comp += 16;
- }
+ i -= 16;
+ } while (i);
}
}
diff --git a/vpx_dsp/arm/subtract_neon.c b/vpx_dsp/arm/subtract_neon.c
index ce81fb630..eef123368 100644
--- a/vpx_dsp/arm/subtract_neon.c
+++ b/vpx_dsp/arm/subtract_neon.c
@@ -9,71 +9,72 @@
*/
#include <arm_neon.h>
+#include <assert.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/mem_neon.h"
void vpx_subtract_block_neon(int rows, int cols, int16_t *diff,
ptrdiff_t diff_stride, const uint8_t *src,
ptrdiff_t src_stride, const uint8_t *pred,
ptrdiff_t pred_stride) {
- int r, c;
+ int r = rows, c;
if (cols > 16) {
- for (r = 0; r < rows; ++r) {
+ do {
for (c = 0; c < cols; c += 32) {
- const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]);
- const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]);
- const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]);
- const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]);
- const uint16x8_t v_diff_lo_00 =
- vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00));
- const uint16x8_t v_diff_hi_00 =
- vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00));
- const uint16x8_t v_diff_lo_16 =
- vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16));
- const uint16x8_t v_diff_hi_16 =
- vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16));
- vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00));
- vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00));
- vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16));
- vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16));
+ const uint8x16_t s0 = vld1q_u8(&src[c + 0]);
+ const uint8x16_t s1 = vld1q_u8(&src[c + 16]);
+ const uint8x16_t p0 = vld1q_u8(&pred[c + 0]);
+ const uint8x16_t p1 = vld1q_u8(&pred[c + 16]);
+ const uint16x8_t d0 = vsubl_u8(vget_low_u8(s0), vget_low_u8(p0));
+ const uint16x8_t d1 = vsubl_u8(vget_high_u8(s0), vget_high_u8(p0));
+ const uint16x8_t d2 = vsubl_u8(vget_low_u8(s1), vget_low_u8(p1));
+ const uint16x8_t d3 = vsubl_u8(vget_high_u8(s1), vget_high_u8(p1));
+ vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(d0));
+ vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(d1));
+ vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(d2));
+ vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(d3));
}
diff += diff_stride;
pred += pred_stride;
src += src_stride;
- }
+ } while (--r);
} else if (cols > 8) {
- for (r = 0; r < rows; ++r) {
- const uint8x16_t v_src = vld1q_u8(&src[0]);
- const uint8x16_t v_pred = vld1q_u8(&pred[0]);
- const uint16x8_t v_diff_lo =
- vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred));
- const uint16x8_t v_diff_hi =
- vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred));
- vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo));
- vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi));
+ do {
+ const uint8x16_t s = vld1q_u8(&src[0]);
+ const uint8x16_t p = vld1q_u8(&pred[0]);
+ const uint16x8_t d0 = vsubl_u8(vget_low_u8(s), vget_low_u8(p));
+ const uint16x8_t d1 = vsubl_u8(vget_high_u8(s), vget_high_u8(p));
+ vst1q_s16(&diff[0], vreinterpretq_s16_u16(d0));
+ vst1q_s16(&diff[8], vreinterpretq_s16_u16(d1));
diff += diff_stride;
pred += pred_stride;
src += src_stride;
- }
+ } while (--r);
} else if (cols > 4) {
- for (r = 0; r < rows; ++r) {
- const uint8x8_t v_src = vld1_u8(&src[0]);
- const uint8x8_t v_pred = vld1_u8(&pred[0]);
- const uint16x8_t v_diff = vsubl_u8(v_src, v_pred);
+ do {
+ const uint8x8_t s = vld1_u8(&src[0]);
+ const uint8x8_t p = vld1_u8(&pred[0]);
+ const uint16x8_t v_diff = vsubl_u8(s, p);
vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff));
diff += diff_stride;
pred += pred_stride;
src += src_stride;
- }
+ } while (--r);
} else {
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c];
-
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
+ assert(cols == 4);
+ do {
+ const uint8x8_t s = load_unaligned_u8(src, (int)src_stride);
+ const uint8x8_t p = load_unaligned_u8(pred, (int)pred_stride);
+ const uint16x8_t d = vsubl_u8(s, p);
+ vst1_s16(diff + 0 * diff_stride, vreinterpret_s16_u16(vget_low_u16(d)));
+ vst1_s16(diff + 1 * diff_stride, vreinterpret_s16_u16(vget_high_u16(d)));
+ diff += 2 * diff_stride;
+ pred += 2 * pred_stride;
+ src += 2 * src_stride;
+ r -= 2;
+ } while (r);
}
}
diff --git a/vpx_dsp/ppc/deblock_vsx.c b/vpx_dsp/ppc/deblock_vsx.c
new file mode 100644
index 000000000..4329081ee
--- /dev/null
+++ b/vpx_dsp/ppc/deblock_vsx.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/ppc/types_vsx.h"
+
+extern const int16_t vpx_rv[];
+
+static const uint8x16_t load_merge = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A,
+ 0x0C, 0x0E, 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+
+static const uint8x16_t st8_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+ 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+
+static INLINE uint8x16_t vec_abd_s8(uint8x16_t a, uint8x16_t b) {
+ return vec_sub(vec_max(a, b), vec_min(a, b));
+}
+
+static INLINE uint8x16_t apply_filter(uint8x16_t ctx[4], uint8x16_t v,
+ uint8x16_t filter) {
+ const uint8x16_t k1 = vec_avg(ctx[0], ctx[1]);
+ const uint8x16_t k2 = vec_avg(ctx[3], ctx[2]);
+ const uint8x16_t k3 = vec_avg(k1, k2);
+ const uint8x16_t f_a = vec_max(vec_abd_s8(v, ctx[0]), vec_abd_s8(v, ctx[1]));
+ const uint8x16_t f_b = vec_max(vec_abd_s8(v, ctx[2]), vec_abd_s8(v, ctx[3]));
+ const bool8x16_t mask = vec_cmplt(vec_max(f_a, f_b), filter);
+ return vec_sel(v, vec_avg(k3, v), mask);
+}
+
+static INLINE void vert_ctx(uint8x16_t ctx[4], int col, uint8_t *src,
+ int stride) {
+ ctx[0] = vec_vsx_ld(col - 2 * stride, src);
+ ctx[1] = vec_vsx_ld(col - stride, src);
+ ctx[2] = vec_vsx_ld(col + stride, src);
+ ctx[3] = vec_vsx_ld(col + 2 * stride, src);
+}
+
+static INLINE void horz_ctx(uint8x16_t ctx[4], uint8x16_t left_ctx,
+ uint8x16_t v, uint8x16_t right_ctx) {
+ static const uint8x16_t l2_perm = { 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+ 0x1A, 0x1B, 0x1C, 0x1D };
+
+ static const uint8x16_t l1_perm = { 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
+ 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A,
+ 0x1B, 0x1C, 0x1D, 0x1E };
+
+ static const uint8x16_t r1_perm = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C,
+ 0x0D, 0x0E, 0x0F, 0x10 };
+
+ static const uint8x16_t r2_perm = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+ 0x0E, 0x0F, 0x10, 0x11 };
+ ctx[0] = vec_perm(left_ctx, v, l2_perm);
+ ctx[1] = vec_perm(left_ctx, v, l1_perm);
+ ctx[2] = vec_perm(v, right_ctx, r1_perm);
+ ctx[3] = vec_perm(v, right_ctx, r2_perm);
+}
+void vpx_post_proc_down_and_across_mb_row_vsx(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line, int cols,
+ unsigned char *f, int size) {
+ int row, col;
+ uint8x16_t ctx[4], out, v, left_ctx;
+
+ for (row = 0; row < size; row++) {
+ for (col = 0; col < cols - 8; col += 16) {
+ const uint8x16_t filter = vec_vsx_ld(col, f);
+ v = vec_vsx_ld(col, src_ptr);
+ vert_ctx(ctx, col, src_ptr, src_pixels_per_line);
+ vec_vsx_st(apply_filter(ctx, v, filter), col, dst_ptr);
+ }
+
+ if (col != cols) {
+ const uint8x16_t filter = vec_vsx_ld(col, f);
+ v = vec_vsx_ld(col, src_ptr);
+ vert_ctx(ctx, col, src_ptr, src_pixels_per_line);
+ out = apply_filter(ctx, v, filter);
+ vec_vsx_st(vec_perm(out, v, st8_perm), col, dst_ptr);
+ }
+
+ /* now post_proc_across */
+ left_ctx = vec_splats(dst_ptr[0]);
+ v = vec_vsx_ld(0, dst_ptr);
+ for (col = 0; col < cols - 8; col += 16) {
+ const uint8x16_t filter = vec_vsx_ld(col, f);
+ const uint8x16_t right_ctx = (col + 16 == cols)
+ ? vec_splats(dst_ptr[cols - 1])
+ : vec_vsx_ld(col, dst_ptr + 16);
+ horz_ctx(ctx, left_ctx, v, right_ctx);
+ vec_vsx_st(apply_filter(ctx, v, filter), col, dst_ptr);
+ left_ctx = v;
+ v = right_ctx;
+ }
+
+ if (col != cols) {
+ const uint8x16_t filter = vec_vsx_ld(col, f);
+ const uint8x16_t right_ctx = vec_splats(dst_ptr[cols - 1]);
+ horz_ctx(ctx, left_ctx, v, right_ctx);
+ out = apply_filter(ctx, v, filter);
+ vec_vsx_st(vec_perm(out, v, st8_perm), col, dst_ptr);
+ }
+
+ src_ptr += src_pixels_per_line;
+ dst_ptr += dst_pixels_per_line;
+ }
+}
+
+// C: s[c + 7]
+static INLINE int16x8_t next7l_s16(uint8x16_t c) {
+ static const uint8x16_t next7_perm = {
+ 0x07, 0x10, 0x08, 0x11, 0x09, 0x12, 0x0A, 0x13,
+ 0x0B, 0x14, 0x0C, 0x15, 0x0D, 0x16, 0x0E, 0x17,
+ };
+ return (int16x8_t)vec_perm(c, vec_zeros_u8, next7_perm);
+}
+
+// Slide across window and add.
+static INLINE int16x8_t slide_sum_s16(int16x8_t x) {
+ // x = A B C D E F G H
+ //
+ // 0 A B C D E F G
+ const int16x8_t sum1 = vec_add(x, vec_slo(x, vec_splats((int8_t)(2 << 3))));
+ // 0 0 A B C D E F
+ const int16x8_t sum2 = vec_add(vec_slo(x, vec_splats((int8_t)(4 << 3))),
+ // 0 0 0 A B C D E
+ vec_slo(x, vec_splats((int8_t)(6 << 3))));
+ // 0 0 0 0 A B C D
+ const int16x8_t sum3 = vec_add(vec_slo(x, vec_splats((int8_t)(8 << 3))),
+ // 0 0 0 0 0 A B C
+ vec_slo(x, vec_splats((int8_t)(10 << 3))));
+ // 0 0 0 0 0 0 A B
+ const int16x8_t sum4 = vec_add(vec_slo(x, vec_splats((int8_t)(12 << 3))),
+ // 0 0 0 0 0 0 0 A
+ vec_slo(x, vec_splats((int8_t)(14 << 3))));
+ return vec_add(vec_add(sum1, sum2), vec_add(sum3, sum4));
+}
+
+// Slide across window and add.
+static INLINE int32x4_t slide_sumsq_s32(int32x4_t xsq_even, int32x4_t xsq_odd) {
+ // 0 A C E
+ // + 0 B D F
+ int32x4_t sumsq_1 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(4 << 3))),
+ vec_slo(xsq_odd, vec_splats((int8_t)(4 << 3))));
+ // 0 0 A C
+ // + 0 0 B D
+ int32x4_t sumsq_2 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(8 << 3))),
+ vec_slo(xsq_odd, vec_splats((int8_t)(8 << 3))));
+ // 0 0 0 A
+ // + 0 0 0 B
+ int32x4_t sumsq_3 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(12 << 3))),
+ vec_slo(xsq_odd, vec_splats((int8_t)(12 << 3))));
+ sumsq_1 = vec_add(sumsq_1, xsq_even);
+ sumsq_2 = vec_add(sumsq_2, sumsq_3);
+ return vec_add(sumsq_1, sumsq_2);
+}
+
+// C: (b + sum + val) >> 4
+static INLINE int16x8_t filter_s16(int16x8_t b, int16x8_t sum, int16x8_t val) {
+ return vec_sra(vec_add(vec_add(b, sum), val), vec_splats((uint16_t)4));
+}
+
+// C: sumsq * 15 - sum * sum
+static INLINE bool16x8_t mask_s16(int32x4_t sumsq_even, int32x4_t sumsq_odd,
+ int16x8_t sum, int32x4_t lim) {
+ static const uint8x16_t mask_merge = { 0x00, 0x01, 0x10, 0x11, 0x04, 0x05,
+ 0x14, 0x15, 0x08, 0x09, 0x18, 0x19,
+ 0x0C, 0x0D, 0x1C, 0x1D };
+ const int32x4_t sumsq_odd_scaled =
+ vec_mul(sumsq_odd, vec_splats((int32_t)15));
+ const int32x4_t sumsq_even_scaled =
+ vec_mul(sumsq_even, vec_splats((int32_t)15));
+ const int32x4_t thres_odd = vec_sub(sumsq_odd_scaled, vec_mulo(sum, sum));
+ const int32x4_t thres_even = vec_sub(sumsq_even_scaled, vec_mule(sum, sum));
+
+ const bool32x4_t mask_odd = vec_cmplt(thres_odd, lim);
+ const bool32x4_t mask_even = vec_cmplt(thres_even, lim);
+ return vec_perm((bool16x8_t)mask_even, (bool16x8_t)mask_odd, mask_merge);
+}
+
+void vpx_mbpost_proc_across_ip_vsx(unsigned char *src, int pitch, int rows,
+ int cols, int flimit) {
+ int row, col;
+ const int32x4_t lim = vec_splats(flimit);
+
+ // 8 columns are processed at a time.
+ assert(cols % 8 == 0);
+
+ for (row = 0; row < rows; row++) {
+ // The sum is signed and requires at most 13 bits.
+ // (8 bits + sign) * 15 (4 bits)
+ int16x8_t sum;
+ // The sum of squares requires at most 20 bits.
+ // (16 bits + sign) * 15 (4 bits)
+ int32x4_t sumsq_even, sumsq_odd;
+
+ // Fill left context with first col.
+ int16x8_t left_ctx = vec_splats((int16_t)src[0]);
+ int16_t s = src[0] * 9;
+ int32_t ssq = src[0] * src[0] * 9 + 16;
+
+ // Fill the next 6 columns of the sliding window with cols 2 to 7.
+ for (col = 1; col <= 6; ++col) {
+ s += src[col];
+ ssq += src[col] * src[col];
+ }
+ // Set this sum to every element in the window.
+ sum = vec_splats(s);
+ sumsq_even = vec_splats(ssq);
+ sumsq_odd = vec_splats(ssq);
+
+ for (col = 0; col < cols; col += 8) {
+ bool16x8_t mask;
+ int16x8_t filtered, masked;
+ uint8x16_t out;
+
+ const uint8x16_t val = vec_vsx_ld(0, src + col);
+ const int16x8_t val_high = unpack_to_s16_h(val);
+
+ // C: s[c + 7]
+ const int16x8_t right_ctx = (col + 8 == cols)
+ ? vec_splats((int16_t)src[col + 7])
+ : next7l_s16(val);
+
+ // C: x = s[c + 7] - s[c - 8];
+ const int16x8_t x = vec_sub(right_ctx, left_ctx);
+ const int32x4_t xsq_even =
+ vec_sub(vec_mule(right_ctx, right_ctx), vec_mule(left_ctx, left_ctx));
+ const int32x4_t xsq_odd =
+ vec_sub(vec_mulo(right_ctx, right_ctx), vec_mulo(left_ctx, left_ctx));
+
+ const int32x4_t sumsq_tmp = slide_sumsq_s32(xsq_even, xsq_odd);
+ // A C E G
+ // 0 B D F
+ // 0 A C E
+ // 0 0 B D
+ // 0 0 A C
+ // 0 0 0 B
+ // 0 0 0 A
+ sumsq_even = vec_add(sumsq_even, sumsq_tmp);
+ // B D F G
+ // A C E G
+ // 0 B D F
+ // 0 A C E
+ // 0 0 B D
+ // 0 0 A C
+ // 0 0 0 B
+ // 0 0 0 A
+ sumsq_odd = vec_add(sumsq_odd, vec_add(sumsq_tmp, xsq_odd));
+
+ sum = vec_add(sum, slide_sum_s16(x));
+
+ // C: (8 + sum + s[c]) >> 4
+ filtered = filter_s16(vec_splats((int16_t)8), sum, val_high);
+ // C: sumsq * 15 - sum * sum
+ mask = mask_s16(sumsq_even, sumsq_odd, sum, lim);
+ masked = vec_sel(val_high, filtered, mask);
+
+ out = vec_perm((uint8x16_t)masked, vec_vsx_ld(0, src + col), load_merge);
+ vec_vsx_st(out, 0, src + col);
+
+ // Update window sum and square sum
+ sum = vec_splat(sum, 7);
+ sumsq_even = vec_splat(sumsq_odd, 3);
+ sumsq_odd = vec_splat(sumsq_odd, 3);
+
+ // C: s[c - 8] (for next iteration)
+ left_ctx = val_high;
+ }
+ src += pitch;
+ }
+}
+
+void vpx_mbpost_proc_down_vsx(uint8_t *dst, int pitch, int rows, int cols,
+ int flimit) {
+ int col, row, i;
+ int16x8_t window[16];
+ const int32x4_t lim = vec_splats(flimit);
+
+ // 8 columns are processed at a time.
+ assert(cols % 8 == 0);
+ // If rows is less than 8 the bottom border extension fails.
+ assert(rows >= 8);
+
+ for (col = 0; col < cols; col += 8) {
+ // The sum is signed and requires at most 13 bits.
+ // (8 bits + sign) * 15 (4 bits)
+ int16x8_t r1, sum;
+ // The sum of squares requires at most 20 bits.
+ // (16 bits + sign) * 15 (4 bits)
+ int32x4_t sumsq_even, sumsq_odd;
+
+ r1 = unpack_to_s16_h(vec_vsx_ld(0, dst));
+ // Fill sliding window with first row.
+ for (i = 0; i <= 8; i++) {
+ window[i] = r1;
+ }
+ // First 9 rows of the sliding window are the same.
+ // sum = r1 * 9
+ sum = vec_mladd(r1, vec_splats((int16_t)9), vec_zeros_s16);
+
+ // sumsq = r1 * r1 * 9
+ sumsq_even = vec_mule(sum, r1);
+ sumsq_odd = vec_mulo(sum, r1);
+
+ // Fill the next 6 rows of the sliding window with rows 2 to 7.
+ for (i = 1; i <= 6; ++i) {
+ const int16x8_t next_row = unpack_to_s16_h(vec_vsx_ld(i * pitch, dst));
+ window[i + 8] = next_row;
+ sum = vec_add(sum, next_row);
+ sumsq_odd = vec_add(sumsq_odd, vec_mulo(next_row, next_row));
+ sumsq_even = vec_add(sumsq_even, vec_mule(next_row, next_row));
+ }
+
+ for (row = 0; row < rows; row++) {
+ int32x4_t d15_even, d15_odd, d0_even, d0_odd;
+ bool16x8_t mask;
+ int16x8_t filtered, masked;
+ uint8x16_t out;
+
+ const int16x8_t rv = vec_vsx_ld(0, vpx_rv + (row & 127));
+
+ // Move the sliding window
+ if (row + 7 < rows) {
+ window[15] = unpack_to_s16_h(vec_vsx_ld((row + 7) * pitch, dst));
+ } else {
+ window[15] = window[14];
+ }
+
+ // C: sum += s[7 * pitch] - s[-8 * pitch];
+ sum = vec_add(sum, vec_sub(window[15], window[0]));
+
+ // C: sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 *
+ // pitch];
+ // Optimization Note: Caching a squared-window for odd and even is
+ // slower than just repeating the multiplies.
+ d15_odd = vec_mulo(window[15], window[15]);
+ d15_even = vec_mule(window[15], window[15]);
+ d0_odd = vec_mulo(window[0], window[0]);
+ d0_even = vec_mule(window[0], window[0]);
+ sumsq_odd = vec_add(sumsq_odd, vec_sub(d15_odd, d0_odd));
+ sumsq_even = vec_add(sumsq_even, vec_sub(d15_even, d0_even));
+
+ // C: (vpx_rv[(r & 127) + (c & 7)] + sum + s[0]) >> 4
+ filtered = filter_s16(rv, sum, window[8]);
+
+ // C: sumsq * 15 - sum * sum
+ mask = mask_s16(sumsq_even, sumsq_odd, sum, lim);
+ masked = vec_sel(window[8], filtered, mask);
+
+ // TODO(ltrudeau) If cols % 16 == 0, we could just process 16 per
+ // iteration
+ out = vec_perm((uint8x16_t)masked, vec_vsx_ld(0, dst + row * pitch),
+ load_merge);
+ vec_vsx_st(out, 0, dst + row * pitch);
+
+ // Optimization Note: Turns out that the following loop is faster than
+ // using pointers to manage the sliding window.
+ for (i = 1; i < 16; i++) {
+ window[i - 1] = window[i];
+ }
+ }
+ dst += 8;
+ }
+}
diff --git a/vpx_dsp/ppc/inv_txfm_vsx.c b/vpx_dsp/ppc/inv_txfm_vsx.c
index f095cb0a4..6603b85ac 100644
--- a/vpx_dsp/ppc/inv_txfm_vsx.c
+++ b/vpx_dsp/ppc/inv_txfm_vsx.c
@@ -76,6 +76,8 @@ static int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, 2404, 2404, 2404, 2404 };
static int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, 1606, 1606, 1606, 1606 };
static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 };
+static uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 };
#define ROUND_SHIFT_INIT \
const int32x4_t shift = vec_sl(vec_splat_s32(1), vec_splat_u32(13)); \
const uint32x4_t shift14 = vec_splat_u32(14);
@@ -107,6 +109,15 @@ static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 };
out1 = vec_sub(step0, step1); \
out1 = vec_perm(out1, out1, mask0);
+#define PACK_STORE(v0, v1) \
+ tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0); \
+ tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1); \
+ output_v = vec_packsu(tmp16_0, tmp16_1); \
+ \
+ vec_vsx_st(output_v, 0, tmp_dest); \
+ for (i = 0; i < 4; i++) \
+ for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i];
+
void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int stride) {
int i, j;
@@ -114,13 +125,10 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int16x8_t step0, step1, tmp16_0, tmp16_1, t_out0, t_out1;
uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 };
- uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 };
int16x8_t v0 = load_tran_low(0, input);
int16x8_t v1 = load_tran_low(8 * sizeof(*input), input);
int16x8_t t0 = vec_mergeh(v0, v1);
int16x8_t t1 = vec_mergel(v0, v1);
-
uint8x16_t dest0 = vec_vsx_ld(0, dest);
uint8x16_t dest1 = vec_vsx_ld(stride, dest);
uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest);
@@ -130,6 +138,7 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov);
int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov);
int16x8_t d_u3 = (int16x8_t)vec_mergeh(dest3, zerov);
+
uint8x16_t output_v;
uint8_t tmp_dest[16];
ROUND_SHIFT_INIT
@@ -148,13 +157,8 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
PIXEL_ADD4(v0, t_out0);
PIXEL_ADD4(v1, t_out1);
- tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0);
- tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1);
- output_v = vec_packsu(tmp16_0, tmp16_1);
- vec_vsx_st(output_v, 0, tmp_dest);
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i];
+ PACK_STORE(v0, v1);
}
#define TRANSPOSE8x8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
@@ -1062,3 +1066,67 @@ void vpx_idct32x32_1024_add_vsx(const tran_low_t *input, uint8_t *dest,
ADD_STORE_BLOCK(src2, 16);
ADD_STORE_BLOCK(src3, 24);
}
+
+#define TRANSFORM_COLS \
+ v32_a = vec_add(v32_a, v32_c); \
+ v32_d = vec_sub(v32_d, v32_b); \
+ v32_e = vec_sub(v32_a, v32_d); \
+ v32_e = vec_sra(v32_e, one); \
+ v32_b = vec_sub(v32_e, v32_b); \
+ v32_c = vec_sub(v32_e, v32_c); \
+ v32_a = vec_sub(v32_a, v32_b); \
+ v32_d = vec_add(v32_d, v32_c); \
+ v_a = vec_packs(v32_a, v32_b); \
+ v_c = vec_packs(v32_c, v32_d);
+
+#define TRANSPOSE_WHT \
+ tmp_a = vec_mergeh(v_a, v_c); \
+ tmp_c = vec_mergel(v_a, v_c); \
+ v_a = vec_mergeh(tmp_a, tmp_c); \
+ v_c = vec_mergel(tmp_a, tmp_c);
+
+void vpx_iwht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ int16x8_t v_a = load_tran_low(0, input);
+ int16x8_t v_c = load_tran_low(8 * sizeof(*input), input);
+ int16x8_t tmp_a, tmp_c;
+ uint16x8_t two = vec_splat_u16(2);
+ uint32x4_t one = vec_splat_u32(1);
+ int16x8_t tmp16_0, tmp16_1;
+ int32x4_t v32_a, v32_c, v32_d, v32_b, v32_e;
+ uint8x16_t dest0 = vec_vsx_ld(0, dest);
+ uint8x16_t dest1 = vec_vsx_ld(stride, dest);
+ uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest);
+ uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest);
+ int16x8_t d_u0 = (int16x8_t)unpack_to_u16_h(dest0);
+ int16x8_t d_u1 = (int16x8_t)unpack_to_u16_h(dest1);
+ int16x8_t d_u2 = (int16x8_t)unpack_to_u16_h(dest2);
+ int16x8_t d_u3 = (int16x8_t)unpack_to_u16_h(dest3);
+ uint8x16_t output_v;
+ uint8_t tmp_dest[16];
+ int i, j;
+
+ v_a = vec_sra(v_a, two);
+ v_c = vec_sra(v_c, two);
+
+ TRANSPOSE_WHT;
+
+ v32_a = vec_unpackh(v_a);
+ v32_c = vec_unpackl(v_a);
+
+ v32_d = vec_unpackh(v_c);
+ v32_b = vec_unpackl(v_c);
+
+ TRANSFORM_COLS;
+
+ TRANSPOSE_WHT;
+
+ v32_a = vec_unpackh(v_a);
+ v32_c = vec_unpackl(v_a);
+ v32_d = vec_unpackh(v_c);
+ v32_b = vec_unpackl(v_c);
+
+ TRANSFORM_COLS;
+
+ PACK_STORE(v_a, v_c);
+}
diff --git a/vpx_dsp/ppc/quantize_vsx.c b/vpx_dsp/ppc/quantize_vsx.c
index e037f89e3..3a9092f64 100644
--- a/vpx_dsp/ppc/quantize_vsx.c
+++ b/vpx_dsp/ppc/quantize_vsx.c
@@ -20,31 +20,70 @@ static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) {
return vec_xor(vec_add(a, mask), mask);
}
+// Sets the value of a 32-bit integers to 1 when the corresponding value in a is
+// negative.
+static INLINE int32x4_t vec_is_neg(int32x4_t a) {
+ return vec_sr(a, vec_shift_sign_s32);
+}
+
// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
// integers, and return the high 16 bits of the intermediate integers.
+// (a * b) >> 16
static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) {
// madds does ((A * B) >>15) + C, we need >> 16, so we perform an extra right
// shift.
- return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_s16);
+ return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16);
}
+// Quantization function used for 4x4, 8x8 and 16x16 blocks.
static INLINE int16x8_t quantize_coeff(int16x8_t coeff, int16x8_t coeff_abs,
int16x8_t round, int16x8_t quant,
int16x8_t quant_shift, bool16x8_t mask) {
- int16x8_t rounded, qcoeff;
- rounded = vec_vaddshs(coeff_abs, round);
- qcoeff = vec_mulhi(rounded, quant);
+ const int16x8_t rounded = vec_vaddshs(coeff_abs, round);
+ int16x8_t qcoeff = vec_mulhi(rounded, quant);
qcoeff = vec_add(qcoeff, rounded);
qcoeff = vec_mulhi(qcoeff, quant_shift);
qcoeff = vec_sign(qcoeff, coeff);
return vec_and(qcoeff, mask);
}
+// Quantization function used for 32x32 blocks.
+static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs,
+ int16x8_t round, int16x8_t quant,
+ int16x8_t quant_shift,
+ bool16x8_t mask) {
+ const int16x8_t rounded = vec_vaddshs(coeff_abs, round);
+ int16x8_t qcoeff = vec_mulhi(rounded, quant);
+ qcoeff = vec_add(qcoeff, rounded);
+ // 32x32 blocks require an extra multiplication by 2, this compensates for the
+ // extra right shift added in vec_mulhi, as such vec_madds can be used
+ // directly instead of vec_mulhi (((a * b) >> 15) >> 1) << 1 == (a * b >> 15)
+ qcoeff = vec_madds(qcoeff, quant_shift, vec_zeros_s16);
+ qcoeff = vec_sign(qcoeff, coeff);
+ return vec_and(qcoeff, mask);
+}
+
+// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32
+// blocks are twice as big as for other block sizes. As such, using
+// vec_mladd results in overflow.
+static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
+ int16x8_t dequant) {
+ int16x8_t dqcoeff;
+ int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
+ int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
+ // Add 1 if negative to round towards zero because the C uses division.
+ dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe));
+ dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
+ dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
+ dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
+ dqcoeff = vec_pack(dqcoeffe, dqcoeffo);
+ return vec_perm(dqcoeff, dqcoeff, vec_perm_merge);
+}
+
static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff, bool16x8_t mask,
- const int16_t *iscan_ptr) {
- bool16x8_t zero_coeff;
- int16x8_t scan = vec_vsx_ld(0, iscan_ptr);
- zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16);
+ const int16_t *iscan_ptr, int index) {
+ int16x8_t scan = vec_vsx_ld(index, iscan_ptr);
+ bool16x8_t zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16);
scan = vec_sub(scan, mask);
return vec_andc(scan, zero_coeff);
}
@@ -64,7 +103,8 @@ void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
- int16x8_t qcoeff, dqcoeff, eob;
+ int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
+ bool16x8_t zero_mask0, zero_mask1;
// First set of 8 coeff starts with DC + 7 AC
int16x8_t zbin = vec_vsx_ld(0, zbin_ptr);
@@ -73,51 +113,194 @@ void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr);
- int16x8_t coeff = vec_vsx_ld(0, coeff_ptr);
- int16x8_t coeff_abs = vec_abs(coeff);
- bool16x8_t zero_mask = vec_cmpge(coeff_abs, zbin);
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+
+ int16x8_t coeff0_abs = vec_abs(coeff0);
+ int16x8_t coeff1_abs = vec_abs(coeff1);
+
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zbin = vec_splat(zbin, 1);
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
(void)scan_ptr;
(void)skip_block;
assert(!skip_block);
- qcoeff =
- quantize_coeff(coeff, coeff_abs, round, quant, quant_shift, zero_mask);
- vec_vsx_st(qcoeff, 0, qcoeff_ptr);
-
- dqcoeff = vec_mladd(qcoeff, dequant, vec_zeros_s16);
- vec_vsx_st(dqcoeff, 0, dqcoeff_ptr);
-
- eob = nonzero_scanindex(qcoeff, zero_mask, iscan_ptr);
-
- // All other sets of 8 coeffs will only contain AC
- zbin = vec_splat(zbin, 1);
+ qcoeff0 =
+ quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0);
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
round = vec_splat(round, 1);
quant = vec_splat(quant, 1);
- dequant = vec_splat(dequant, 1);
quant_shift = vec_splat(quant_shift, 1);
+ qcoeff1 =
+ quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
+ dequant = vec_splat(dequant, 1);
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
+
+ eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0),
+ nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16));
+
+ if (n_coeffs > 16) {
+ int index = 16;
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+ do {
+ int16x8_t coeff2, coeff2_abs, qcoeff2, dqcoeff2, eob2;
+ bool16x8_t zero_mask2;
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+ coeff0_abs = vec_abs(coeff0);
+ coeff1_abs = vec_abs(coeff1);
+ coeff2_abs = vec_abs(coeff2);
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+ zero_mask2 = vec_cmpge(coeff2_abs, zbin);
+ qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift,
+ zero_mask0);
+ qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift,
+ zero_mask1);
+ qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift,
+ zero_mask2);
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16);
+
+ vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
+
+ eob =
+ vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0));
+ eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1),
+ nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2));
+ eob = vec_max(eob, eob2);
+
+ index += 24;
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ } while (index < n_coeffs);
+ }
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0];
+}
+
+void vpx_quantize_b_32x32_vsx(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ // In stage 1, we quantize 16 coeffs (DC + 15 AC)
+ // In stage 2, we loop 42 times and quantize 24 coeffs per iteration
+ // (32 * 32 - 16) / 24 = 42
+ int num_itr = 42;
+ // Offsets are in bytes, 16 coeffs = 32 bytes
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+
+ int16x8_t qcoeff0, qcoeff1, eob;
+ bool16x8_t zero_mask0, zero_mask1;
+
+ int16x8_t zbin = vec_vsx_ld(0, zbin_ptr);
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr);
+
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+
+ int16x8_t coeff0_abs = vec_abs(coeff0);
+ int16x8_t coeff1_abs = vec_abs(coeff1);
+
+ (void)scan_ptr;
+ (void)skip_block;
+ (void)n_coeffs;
+ assert(!skip_block);
+
+ // 32x32 quantization requires that zbin and round be divided by 2
+ zbin = vec_sra(vec_add(zbin, vec_ones_s16), vec_ones_u16);
+ round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16);
+
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zbin = vec_splat(zbin, 1); // remove DC from zbin
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+
+ qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift,
+ zero_mask0);
+ round = vec_splat(round, 1); // remove DC from round
+ quant = vec_splat(quant, 1); // remove DC from quant
+ quant_shift = vec_splat(quant_shift, 1); // remove DC from quant_shift
+ qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift,
+ zero_mask1);
+
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr);
+ dequant = vec_splat(dequant, 1); // remove DC from dequant
+ vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr);
+
+ eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0),
+ nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16));
- n_coeffs -= 8;
do {
- coeff_ptr += 8;
- qcoeff_ptr += 8;
- dqcoeff_ptr += 8;
- iscan_ptr += 8;
+ int16x8_t coeff2, coeff2_abs, qcoeff2, eob2;
+ bool16x8_t zero_mask2;
+
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+
+ coeff0_abs = vec_abs(coeff0);
+ coeff1_abs = vec_abs(coeff1);
+ coeff2_abs = vec_abs(coeff2);
+
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+ zero_mask2 = vec_cmpge(coeff2_abs, zbin);
+
+ qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift,
+ zero_mask0);
+ qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift,
+ zero_mask1);
+ qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift,
+ zero_mask2);
- coeff = vec_vsx_ld(0, coeff_ptr);
- coeff_abs = vec_abs(coeff);
- zero_mask = vec_cmpge(coeff_abs, zbin);
- qcoeff =
- quantize_coeff(coeff, coeff_abs, round, quant, quant_shift, zero_mask);
- vec_vsx_st(qcoeff, 0, qcoeff_ptr);
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
- dqcoeff = vec_mladd(qcoeff, dequant, vec_zeros_s16);
- vec_vsx_st(dqcoeff, 0, dqcoeff_ptr);
+ vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr);
+ vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr);
+ vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr);
- eob = vec_max(eob, nonzero_scanindex(qcoeff, zero_mask, iscan_ptr));
+ eob = vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0));
+ eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1),
+ nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2));
+ eob = vec_max(eob, eob2);
- n_coeffs -= 8;
- } while (n_coeffs > 0);
+ // 24 int16_t is 48 bytes
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ num_itr--;
+ } while (num_itr != 0);
eob = vec_max_across(eob);
*eob_ptr = eob[0];
diff --git a/vpx_dsp/ppc/types_vsx.h b/vpx_dsp/ppc/types_vsx.h
index e2af55463..c6c7ce9f1 100644
--- a/vpx_dsp/ppc/types_vsx.h
+++ b/vpx_dsp/ppc/types_vsx.h
@@ -19,7 +19,9 @@ typedef vector signed short int16x8_t;
typedef vector unsigned short uint16x8_t;
typedef vector signed int int32x4_t;
typedef vector unsigned int uint32x4_t;
+typedef vector bool char bool8x16_t;
typedef vector bool short bool16x8_t;
+typedef vector bool int bool32x4_t;
#ifdef __clang__
static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
@@ -66,9 +68,15 @@ static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
#endif
#endif
+static const uint8x16_t vec_zeros_u8 = { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
static const int16x8_t vec_zeros_s16 = { 0, 0, 0, 0, 0, 0, 0, 0 };
-static const uint16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
+static const int16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
+static const uint16x8_t vec_ones_u16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
+static const uint32x4_t vec_ones_u32 = { 1, 1, 1, 1 };
+static const int32x4_t vec_zeros_s32 = { 0, 0, 0, 0 };
static const uint16x8_t vec_shift_sign_s16 = { 15, 15, 15, 15, 15, 15, 15, 15 };
+static const uint32x4_t vec_shift_sign_s32 = { 31, 31, 31, 31 };
static const uint8x16_t vec_perm64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07 };
@@ -79,4 +87,8 @@ static const uint8x16_t vec_perm16 = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0E, 0x0D,
0x0E, 0x0F, 0x00, 0x01 };
+static const uint8x16_t vec_perm_merge = { 0x00, 0x01, 0x08, 0x09, 0x02, 0x03,
+ 0x0A, 0x0B, 0x04, 0x05, 0x0C, 0x0D,
+ 0x06, 0x07, 0x0E, 0x0F };
+
#endif // VPX_DSP_PPC_TYPES_VSX_H_
diff --git a/vpx_dsp/ppc/variance_vsx.c b/vpx_dsp/ppc/variance_vsx.c
index 1efe2f005..d3f257b63 100644
--- a/vpx_dsp/ppc/variance_vsx.c
+++ b/vpx_dsp/ppc/variance_vsx.c
@@ -10,10 +10,11 @@
#include <assert.h>
+#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/ppc/types_vsx.h"
-static inline uint8x16_t read4x2(const uint8_t *a, int stride) {
+static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) {
const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a);
const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride);
@@ -101,3 +102,174 @@ void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width,
}
}
}
+
+static INLINE void variance_inner_32(const uint8_t *a, const uint8_t *b,
+ int32x4_t *sum_squared, int32x4_t *sum) {
+ int32x4_t s = *sum;
+ int32x4_t ss = *sum_squared;
+
+ const uint8x16_t va0 = vec_vsx_ld(0, a);
+ const uint8x16_t vb0 = vec_vsx_ld(0, b);
+ const uint8x16_t va1 = vec_vsx_ld(16, a);
+ const uint8x16_t vb1 = vec_vsx_ld(16, b);
+
+ const int16x8_t a0 = unpack_to_s16_h(va0);
+ const int16x8_t b0 = unpack_to_s16_h(vb0);
+ const int16x8_t a1 = unpack_to_s16_l(va0);
+ const int16x8_t b1 = unpack_to_s16_l(vb0);
+ const int16x8_t a2 = unpack_to_s16_h(va1);
+ const int16x8_t b2 = unpack_to_s16_h(vb1);
+ const int16x8_t a3 = unpack_to_s16_l(va1);
+ const int16x8_t b3 = unpack_to_s16_l(vb1);
+ const int16x8_t d0 = vec_sub(a0, b0);
+ const int16x8_t d1 = vec_sub(a1, b1);
+ const int16x8_t d2 = vec_sub(a2, b2);
+ const int16x8_t d3 = vec_sub(a3, b3);
+
+ s = vec_sum4s(d0, s);
+ ss = vec_msum(d0, d0, ss);
+ s = vec_sum4s(d1, s);
+ ss = vec_msum(d1, d1, ss);
+ s = vec_sum4s(d2, s);
+ ss = vec_msum(d2, d2, ss);
+ s = vec_sum4s(d3, s);
+ ss = vec_msum(d3, d3, ss);
+ *sum = s;
+ *sum_squared = ss;
+}
+
+static INLINE void variance(const uint8_t *a, int a_stride, const uint8_t *b,
+ int b_stride, int w, int h, uint32_t *sse,
+ int *sum) {
+ int i;
+
+ int32x4_t s = vec_splat_s32(0);
+ int32x4_t ss = vec_splat_s32(0);
+
+ switch (w) {
+ case 4:
+ for (i = 0; i < h / 2; ++i) {
+ const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride));
+ const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride));
+ const int16x8_t d = vec_sub(a0, b0);
+ s = vec_sum4s(d, s);
+ ss = vec_msum(d, d, ss);
+ a += a_stride * 2;
+ b += b_stride * 2;
+ }
+ break;
+ case 8:
+ for (i = 0; i < h; ++i) {
+ const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, a));
+ const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, b));
+ const int16x8_t d = vec_sub(a0, b0);
+
+ s = vec_sum4s(d, s);
+ ss = vec_msum(d, d, ss);
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ case 16:
+ for (i = 0; i < h; ++i) {
+ const uint8x16_t va = vec_vsx_ld(0, a);
+ const uint8x16_t vb = vec_vsx_ld(0, b);
+ const int16x8_t a0 = unpack_to_s16_h(va);
+ const int16x8_t b0 = unpack_to_s16_h(vb);
+ const int16x8_t a1 = unpack_to_s16_l(va);
+ const int16x8_t b1 = unpack_to_s16_l(vb);
+ const int16x8_t d0 = vec_sub(a0, b0);
+ const int16x8_t d1 = vec_sub(a1, b1);
+
+ s = vec_sum4s(d0, s);
+ ss = vec_msum(d0, d0, ss);
+ s = vec_sum4s(d1, s);
+ ss = vec_msum(d1, d1, ss);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ case 32:
+ for (i = 0; i < h; ++i) {
+ variance_inner_32(a, b, &ss, &s);
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ case 64:
+ for (i = 0; i < h; ++i) {
+ variance_inner_32(a, b, &ss, &s);
+ variance_inner_32(a + 32, b + 32, &ss, &s);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ }
+
+ s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3);
+
+ vec_ste(s, 0, sum);
+
+ ss = vec_splat(vec_sums(ss, vec_splat_s32(0)), 3);
+
+ vec_ste((uint32x4_t)ss, 0, sse);
+}
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H) \
+ void vpx_get##W##x##H##var_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ uint32_t *sse, int *sum) { \
+ variance(a, a_stride, b, b_stride, W, H, sse, sum); \
+ }
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H) \
+ uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse; \
+ }
+
+#define VAR(W, H) \
+ uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ }
+
+#define VARIANCES(W, H) VAR(W, H)
+
+VARIANCES(64, 64)
+VARIANCES(64, 32)
+VARIANCES(32, 64)
+VARIANCES(32, 32)
+VARIANCES(32, 16)
+VARIANCES(16, 32)
+VARIANCES(16, 16)
+VARIANCES(16, 8)
+VARIANCES(8, 16)
+VARIANCES(8, 8)
+VARIANCES(8, 4)
+VARIANCES(4, 8)
+VARIANCES(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index cb06a476f..573d6fef1 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -69,6 +69,7 @@ DSP_SRCS-$(HAVE_MSA) += mips/deblock_msa.c
DSP_SRCS-$(HAVE_NEON) += arm/deblock_neon.c
DSP_SRCS-$(HAVE_SSE2) += x86/add_noise_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/deblock_sse2.asm
+DSP_SRCS-$(HAVE_VSX) += ppc/deblock_vsx.c
endif # CONFIG_POSTPROC
DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 93ecd7c19..9661f3bd8 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -626,7 +626,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_1_add neon sse2/;
- specialize qw/vpx_iwht4x4_16_add sse2/;
+ specialize qw/vpx_iwht4x4_16_add sse2 vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
@@ -702,7 +702,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
specialize qw/vpx_quantize_b neon sse2 ssse3 avx vsx/;
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b_32x32 neon ssse3 avx/;
+ specialize qw/vpx_quantize_b_32x32 neon ssse3 avx vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
@@ -1082,64 +1082,64 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "
# Variance
#
add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x16 sse2 neon msa mmi/;
+ specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x8 sse2 neon msa mmi/;
+ specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x4 sse2 neon msa mmi/;
+ specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x8 sse2 neon msa mmi/;
+ specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x4 sse2 neon msa mmi/;
+ specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/;
#
# Specialty Variance
#
add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
+ specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx/;
add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get8x8var sse2 neon msa/;
+ specialize qw/vpx_get8x8var sse2 neon msa vsx/;
add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x8 sse2 avx2 msa mmi/;
+ specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/;
add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x16 sse2 msa mmi/;
+ specialize qw/vpx_mse8x16 sse2 msa mmi vsx/;
add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x8 sse2 msa mmi/;
+ specialize qw/vpx_mse8x8 sse2 msa mmi vsx/;
add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
specialize qw/vpx_get_mb_ss sse2 msa vsx/;
@@ -1598,13 +1598,13 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC")
specialize qw/vpx_plane_add_noise sse2 msa/;
add_proto qw/void vpx_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
- specialize qw/vpx_mbpost_proc_down sse2 neon msa/;
+ specialize qw/vpx_mbpost_proc_down sse2 neon msa vsx/;
add_proto qw/void vpx_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
- specialize qw/vpx_mbpost_proc_across_ip sse2 neon msa/;
+ specialize qw/vpx_mbpost_proc_across_ip sse2 neon msa vsx/;
add_proto qw/void vpx_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
- specialize qw/vpx_post_proc_down_and_across_mb_row sse2 neon msa/;
+ specialize qw/vpx_post_proc_down_and_across_mb_row sse2 neon msa vsx/;
}
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
deleted file mode 100644
index 3c1ab99f4..000000000
--- a/vpx_ports/config.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_PORTS_CONFIG_H_
-#define VPX_PORTS_CONFIG_H_
-
-#include "vpx_config.h"
-
-#endif // VPX_PORTS_CONFIG_H_