summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild/make/rtcd.sh9
-rw-r--r--example_xma.c191
-rw-r--r--examples.mk13
-rw-r--r--examples/vp8_multi_resolution_encoder.c (renamed from vp8_multi_resolution_encoder.c)0
-rw-r--r--test/codec_factory.h2
-rw-r--r--test/datarate_test.cc33
-rw-r--r--test/idct8x8_test.cc2
-rw-r--r--test/idct_test.cc2
-rw-r--r--test/pp_filter_test.cc2
-rw-r--r--test/resize_test.cc2
-rw-r--r--test/sad_test.cc2
-rw-r--r--test/sixtap_predict_test.cc2
-rw-r--r--test/test-data.sha11
-rw-r--r--test/test.mk3
-rw-r--r--test/test_vector_test.cc2
-rw-r--r--test/tile_independence_test.cc2
-rw-r--r--test/vp8_fdct4x4_test.cc2
-rw-r--r--test/vp9_lossless_test.cc23
-rw-r--r--test/y4m_video_source.h107
-rw-r--r--vp8/common/onyx.h4
-rw-r--r--vp8/encoder/arm/neon/denoising_neon.c6
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.asm73
-rw-r--r--vp9/common/vp9_alloccommon.c6
-rw-r--r--vp9/common/vp9_blockd.h4
-rw-r--r--vp9/common/vp9_entropy.h4
-rw-r--r--vp9/common/vp9_mvref_common.h6
-rw-r--r--vp9/common/vp9_reconinter.c43
-rw-r--r--vp9/common/vp9_rtcd_defs.sh8
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c100
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c591
-rw-r--r--vp9/decoder/vp9_decodeframe.c7
-rw-r--r--vp9/decoder/vp9_decodemv.c36
-rw-r--r--vp9/encoder/vp9_bitstream.c12
-rw-r--r--vp9/encoder/vp9_encodeframe.c108
-rw-r--r--vp9/encoder/vp9_encodemv.c8
-rw-r--r--vp9/encoder/vp9_firstpass.c264
-rw-r--r--vp9/encoder/vp9_lookahead.c2
-rw-r--r--vp9/encoder/vp9_mcomp.c164
-rw-r--r--vp9/encoder/vp9_mcomp.h4
-rw-r--r--vp9/encoder/vp9_onyx_if.c77
-rw-r--r--vp9/encoder/vp9_picklpf.c93
-rw-r--r--vp9/encoder/vp9_pickmode.c2
-rw-r--r--vp9/encoder/vp9_ratectrl.c97
-rw-r--r--vp9/encoder/vp9_rdopt.c47
-rw-r--r--vp9/encoder/vp9_rdopt.h2
-rw-r--r--vp9/encoder/vp9_resize.c418
-rw-r--r--vp9/encoder/vp9_resize.h67
-rw-r--r--vp9/encoder/vp9_sad_c.c695
-rw-r--r--vp9/encoder/vp9_sadmxn.h38
-rw-r--r--vp9/encoder/vp9_temporal_filter.c2
-rw-r--r--vp9/vp9_common.mk3
-rw-r--r--vp9/vp9_cx_iface.c9
-rw-r--r--vp9/vp9_iface_common.h4
-rw-r--r--vp9/vp9cx.mk3
-rw-r--r--vpx_scale/yv12config.h9
-rw-r--r--vpxdec.c287
-rw-r--r--vpxenc.c4
-rw-r--r--y4menc.c30
-rw-r--r--y4menc.h27
59 files changed, 1646 insertions, 2118 deletions
diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh
index 2967b5aed..ed037132a 100755
--- a/build/make/rtcd.sh
+++ b/build/make/rtcd.sh
@@ -209,6 +209,10 @@ common_top() {
#define RTCD_EXTERN extern
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
$(process_forward_decls)
$(declare_function_pointers c $ALL_ARCHS)
@@ -219,6 +223,11 @@ EOF
common_bottom() {
cat <<EOF
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif
EOF
}
diff --git a/example_xma.c b/example_xma.c
deleted file mode 100644
index 7aa879810..000000000
--- a/example_xma.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This is a simple program showing how to initialize the decoder in XMA mode */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_config.h"
-#include "vpx/vpx_decoder.h"
-#include "vpx/vpx_integer.h"
-#if CONFIG_VP9_DECODER
-#include "vpx/vp8dx.h"
-#endif
-
-static char *exec_name;
-static int verbose = 0;
-
-static const struct {
- const char *name;
- const vpx_codec_iface_t *iface;
-} ifaces[] = {
-#if CONFIG_VP9_DECODER
- {"vp9", &vpx_codec_vp8_dx_algo},
-#endif
-};
-
-static void usage_exit(void) {
- int i;
-
- printf("Usage: %s <options>\n\n"
- "Options:\n"
- "\t--codec <name>\tCodec to use (default=%s)\n"
- "\t-h <height>\tHeight of the simulated video frame, in pixels\n"
- "\t-w <width> \tWidth of the simulated video frame, in pixels\n"
- "\t-v \tVerbose mode (show individual segment sizes)\n"
- "\t--help \tShow this message\n"
- "\n"
- "Included decoders:\n"
- "\n",
- exec_name,
- ifaces[0].name);
-
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- printf(" %-6s - %s\n",
- ifaces[i].name,
- vpx_codec_iface_name(ifaces[i].iface));
-
- exit(EXIT_FAILURE);
-}
-
-static void usage_error(const char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- printf("\n");
- usage_exit();
-}
-
-void my_mem_dtor(vpx_codec_mmap_t *mmap) {
- if (verbose)
- printf("freeing segment %d\n", mmap->id);
-
- free(mmap->priv);
-}
-
-int main(int argc, char **argv) {
- vpx_codec_ctx_t decoder;
- vpx_codec_iface_t *iface = ifaces[0].iface;
- vpx_codec_iter_t iter;
- vpx_codec_dec_cfg_t cfg;
- vpx_codec_err_t res = VPX_CODEC_OK;
- unsigned int alloc_sz = 0;
- unsigned int w = 352;
- unsigned int h = 288;
- int i;
-
- exec_name = argv[0];
-
- for (i = 1; i < argc; i++) {
- if (!strcmp(argv[i], "--codec")) {
- if (i + 1 < argc) {
- int j, k = -1;
-
- i++;
-
- for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
- if (!strcmp(ifaces[j].name, argv[i]))
- k = j;
-
- if (k >= 0)
- iface = ifaces[k].iface;
- else
- usage_error("Error: Unrecognized argument (%s) to --codec\n",
- argv[i]);
- } else
- usage_error("Error: Option --codec requires argument.\n");
- } else if (!strcmp(argv[i], "-v"))
- verbose = 1;
- else if (!strcmp(argv[i], "-h"))
- if (i + 1 < argc) {
- h = atoi(argv[++i]);
- } else
- usage_error("Error: Option -h requires argument.\n");
- else if (!strcmp(argv[i], "-w"))
- if (i + 1 < argc) {
- w = atoi(argv[++i]);
- } else
- usage_error("Error: Option -w requires argument.\n");
- else if (!strcmp(argv[i], "--help"))
- usage_exit();
- else
- usage_error("Error: Unrecognized option %s\n\n", argv[i]);
- }
-
- if (argc == 1)
- printf("Using built-in defaults. For options, rerun with --help\n\n");
-
- /* XMA mode is not supported on all decoders! */
- if (!(vpx_codec_get_caps(iface) & VPX_CODEC_CAP_XMA)) {
- printf("%s does not support XMA mode!\n", vpx_codec_iface_name(iface));
- return EXIT_FAILURE;
- }
-
- /* The codec knows how much memory to allocate based on the size of the
- * encoded frames. This data can be parsed from the bitstream with
- * vpx_codec_peek_stream_info() if a bitstream is available. Otherwise,
- * a fixed size can be used that will be the upper limit on the frame
- * size the decoder can decode.
- */
- cfg.w = w;
- cfg.h = h;
-
- /* Initialize the decoder in XMA mode. */
- if (vpx_codec_dec_init(&decoder, iface, &cfg, VPX_CODEC_USE_XMA)) {
- printf("Failed to initialize decoder in XMA mode: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
-
- /* Iterate through the list of memory maps, allocating them with the
- * requested alignment.
- */
- iter = NULL;
-
- do {
- vpx_codec_mmap_t mmap;
- unsigned int align;
-
- res = vpx_codec_get_mem_map(&decoder, &mmap, &iter);
- align = mmap.align ? mmap.align - 1 : 0;
-
- if (!res) {
- if (verbose)
- printf("Allocating segment %u, size %lu, align %u %s\n",
- mmap.id, mmap.sz, mmap.align,
- mmap.flags & VPX_CODEC_MEM_ZERO ? "(ZEROED)" : "");
-
- if (mmap.flags & VPX_CODEC_MEM_ZERO)
- mmap.priv = calloc(1, mmap.sz + align);
- else
- mmap.priv = malloc(mmap.sz + align);
-
- mmap.base = (void *)((((uintptr_t)mmap.priv) + align) & ~(uintptr_t)align);
- mmap.dtor = my_mem_dtor;
- alloc_sz += mmap.sz + align;
-
- if (vpx_codec_set_mem_map(&decoder, &mmap, 1)) {
- printf("Failed to set mmap: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
- } else if (res != VPX_CODEC_LIST_END) {
- printf("Failed to get mmap: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
- } while (res != VPX_CODEC_LIST_END);
-
- printf("%s\n %d bytes external memory required for %dx%d.\n",
- decoder.name, alloc_sz, cfg.w, cfg.h);
- vpx_codec_destroy(&decoder);
- return EXIT_SUCCESS;
-
-}
diff --git a/examples.mk b/examples.mk
index 66b719ca0..b29ab9c34 100644
--- a/examples.mk
+++ b/examples.mk
@@ -26,6 +26,7 @@ vpxdec.SRCS += args.c args.h
vpxdec.SRCS += ivfdec.c ivfdec.h
vpxdec.SRCS += tools_common.c tools_common.h
vpxdec.SRCS += webmdec.c webmdec.h
+vpxdec.SRCS += y4menc.c y4menc.h
vpxdec.SRCS += nestegg/halloc/halloc.h
vpxdec.SRCS += nestegg/halloc/src/align.h
vpxdec.SRCS += nestegg/halloc/src/halloc.c
@@ -109,11 +110,13 @@ GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c
vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
-# C file is provided, not generated automatically.
-UTILS-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c
-vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS)
-vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
-vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
+
+ifeq ($(CONFIG_MULTI_RES_ENCODING),yes)
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += vp8_multi_resolution_encoder.c
+vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS)
+vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
+vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
+endif
# Handle extra library flags depending on codec configuration
diff --git a/vp8_multi_resolution_encoder.c b/examples/vp8_multi_resolution_encoder.c
index 4c29056e5..4c29056e5 100644
--- a/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
diff --git a/test/codec_factory.h b/test/codec_factory.h
index 2ca6ff086..c060e86dc 100644
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -10,7 +10,6 @@
#ifndef TEST_CODEC_FACTORY_H_
#define TEST_CODEC_FACTORY_H_
-extern "C" {
#include "./vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vpx_encoder.h"
@@ -20,7 +19,6 @@ extern "C" {
#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
-}
#include "test/decode_test_driver.h"
#include "test/encode_test_driver.h"
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 0b4ddaece..db7dfdb53 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -7,11 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_config.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
+#include "test/y4m_video_source.h"
namespace {
@@ -286,6 +288,37 @@ TEST_P(DatarateTestVP9, BasicRateTargeting) {
}
}
+#if CONFIG_NON420
+// Check basic rate targeting,
+TEST_P(DatarateTestVP9, BasicRateTargeting444) {
+ ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
+
+ cfg_.g_profile = 1;
+ cfg_.g_timebase = video.timebase();
+
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_dropframe_thresh = 1;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+
+ for (int i = 250; i < 900; i += 200) {
+ cfg_.rc_target_bitrate = i;
+ ResetModel();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+ effective_datarate_ * 0.85)
+ << " The datarate for the file exceeds the target by too much!";
+ ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+ effective_datarate_ * 1.15)
+ << " The datarate for the file missed the target!"
+ << cfg_.rc_target_bitrate << " "<< effective_datarate_;
+ }
+}
+#endif
+
// Check that (1) the first dropped frame gets earlier and earlier
// as the drop frame threshold is increased, and (2) that the total number of
// frame drops does not decrease as we increase frame drop threshold.
diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc
index d8c61ffb2..5f4c33a81 100644
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -14,9 +14,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
-extern "C" {
#include "./vp9_rtcd.h"
-}
#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
diff --git a/test/idct_test.cc b/test/idct_test.cc
index 2c7fa0ef8..1bbf80a0a 100644
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -8,10 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
-}
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
index e5ac9db2b..ff7bb08e3 100644
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -10,12 +10,10 @@
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
-extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-}
typedef void (*post_proc_func_t)(unsigned char *src_ptr,
unsigned char *dst_ptr,
diff --git a/test/resize_test.cc b/test/resize_test.cc
index e8c2c825b..1963453fd 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -208,7 +208,7 @@ class ResizeInternalTest : public ResizeTest {
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
if (!frame0_psnr_)
frame0_psnr_ = pkt->data.psnr.psnr[0];
- EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.5);
+ EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 453b3a84e..4a91b0b60 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -13,7 +13,6 @@
#include <limits.h>
#include <stdio.h>
-extern "C" {
#include "./vpx_config.h"
#if CONFIG_VP8_ENCODER
#include "./vp8_rtcd.h"
@@ -22,7 +21,6 @@ extern "C" {
#include "./vp9_rtcd.h"
#endif
#include "vpx_mem/vpx_mem.h"
-}
#include "test/acm_random.h"
#include "test/clear_system_state.h"
diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
index 0f5c0a5e8..3434662fb 100644
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -16,12 +16,10 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
-extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-}
namespace {
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 492705948..6daf69e63 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -1,5 +1,6 @@
d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv
b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv
+b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m
5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf
65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf
906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf
diff --git a/test/test.mk b/test/test.mk
index 5a1d39de5..178b16210 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -24,6 +24,8 @@ LIBVPX_TEST_SRCS-yes += encode_test_driver.cc
LIBVPX_TEST_SRCS-yes += encode_test_driver.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
@@ -118,6 +120,7 @@ endif
##
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 6d93bb88f..4adf9af91 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -19,9 +19,7 @@
#include "test/test_vectors.h"
#include "test/util.h"
#include "test/webm_video_source.h"
-extern "C" {
#include "vpx_mem/vpx_mem.h"
-}
namespace {
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc
index 863a3669a..d7144522b 100644
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -17,9 +17,7 @@
#include "test/i420_video_source.h"
#include "test/util.h"
#include "test/md5_helper.h"
-extern "C" {
#include "vpx_mem/vpx_mem.h"
-}
namespace {
class TileIndependenceTest : public ::libvpx_test::EncoderTest,
diff --git a/test/vp8_fdct4x4_test.cc b/test/vp8_fdct4x4_test.cc
index 25465c53c..e3c292ea1 100644
--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@@ -15,9 +15,7 @@
#include <string.h>
#include <sys/types.h>
-extern "C" {
#include "./vp8_rtcd.h"
-}
#include "test/acm_random.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
diff --git a/test/vp9_lossless_test.cc b/test/vp9_lossless_test.cc
index 03b89f8df..2282687dc 100644
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -7,12 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#include "./vpx_config.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
+#include "test/y4m_video_source.h"
namespace {
@@ -71,5 +72,25 @@ TEST_P(LossLessTest, TestLossLessEncoding) {
const double psnr_lossless = GetMinPsnr();
EXPECT_GE(psnr_lossless, kMaxPsnr);
}
+
+#if CONFIG_NON420
+TEST_P(LossLessTest, TestLossLessEncoding444) {
+ libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 10);
+
+ cfg_.g_profile = 1;
+ cfg_.g_timebase = video.timebase();
+ cfg_.rc_target_bitrate = 2000;
+ cfg_.g_lag_in_frames = 25;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 0;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ const double psnr_lossless = GetMinPsnr();
+ EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+#endif
+
VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
} // namespace
diff --git a/test/y4m_video_source.h b/test/y4m_video_source.h
new file mode 100644
index 000000000..bd86c2c04
--- /dev/null
+++ b/test/y4m_video_source.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_Y4M_VIDEO_SOURCE_H_
+#define TEST_Y4M_VIDEO_SOURCE_H_
+#include <string>
+
+#include "test/video_source.h"
+extern "C" {
+#include "./y4minput.h"
+}
+
+namespace libvpx_test {
+
+// This class extends VideoSource to allow parsing of raw yv12
+// so that we can do actual file encodes.
+class Y4mVideoSource : public VideoSource {
+ public:
+ Y4mVideoSource(const std::string &file_name,
+ unsigned int start, int limit)
+ : file_name_(file_name),
+ input_file_(NULL),
+ img_(new vpx_image_t()),
+ start_(start),
+ limit_(limit),
+ frame_(0),
+ framerate_numerator_(0),
+ framerate_denominator_(0),
+ y4m_() {
+ }
+
+ virtual ~Y4mVideoSource() {
+ vpx_img_free(img_.get());
+ y4m_input_close(&y4m_);
+ if (input_file_)
+ fclose(input_file_);
+ }
+
+ virtual void Begin() {
+ if (input_file_)
+ fclose(input_file_);
+ input_file_ = OpenTestDataFile(file_name_);
+ ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+ << file_name_;
+
+ y4m_input_open(&y4m_, input_file_, NULL, 0, 0);
+ framerate_numerator_ = y4m_.fps_n;
+ framerate_denominator_ = y4m_.fps_d;
+
+ frame_ = 0;
+ for (unsigned int i = 0; i < start_; i++) {
+ Next();
+ }
+
+ FillFrame();
+ }
+
+ virtual void Next() {
+ ++frame_;
+ FillFrame();
+ }
+
+ virtual vpx_image_t *img() const {
+ return (frame_ < limit_) ? img_.get() : NULL;
+ }
+
+ // Models a stream where Timebase = 1/FPS, so pts == frame.
+ virtual vpx_codec_pts_t pts() const { return frame_; }
+
+ virtual unsigned long duration() const { return 1; }
+
+ virtual vpx_rational_t timebase() const {
+ const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
+ return t;
+ }
+
+ virtual unsigned int frame() const { return frame_; }
+
+ virtual unsigned int limit() const { return limit_; }
+
+ virtual void FillFrame() {
+ ASSERT_TRUE(input_file_ != NULL);
+ // Read a frame from input_file.
+ y4m_input_fetch_frame(&y4m_, input_file_, img_.get());
+ }
+
+ protected:
+ std::string file_name_;
+ FILE *input_file_;
+ testing::internal::scoped_ptr<vpx_image_t> img_;
+ unsigned int start_;
+ unsigned int limit_;
+ unsigned int frame_;
+ int framerate_numerator_;
+ int framerate_denominator_;
+ y4m_input y4m_;
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_Y4M_VIDEO_SOURCE_H_
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 209a25d82..119e40cdc 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -39,8 +39,8 @@ extern "C"
typedef enum
{
- USAGE_STREAM_FROM_SERVER = 0x0,
- USAGE_LOCAL_FILE_PLAYBACK = 0x1,
+ USAGE_LOCAL_FILE_PLAYBACK = 0x0,
+ USAGE_STREAM_FROM_SERVER = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2,
USAGE_CONSTANT_QUALITY = 0x3
} END_USAGE;
diff --git a/vp8/encoder/arm/neon/denoising_neon.c b/vp8/encoder/arm/neon/denoising_neon.c
index d517dfa37..3f8539759 100644
--- a/vp8/encoder/arm/neon/denoising_neon.c
+++ b/vp8/encoder/arm/neon/denoising_neon.c
@@ -119,8 +119,10 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
v_abs_adjustment);
v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment);
v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment);
- v_sum_diff = vqaddq_s8(v_sum_diff, (int8x16_t)v_pos_adjustment);
- v_sum_diff = vqsubq_s8(v_sum_diff, (int8x16_t)v_neg_adjustment);
+ v_sum_diff = vqaddq_s8(v_sum_diff,
+ vreinterpretq_s8_u8(v_pos_adjustment));
+ v_sum_diff = vqsubq_s8(v_sum_diff,
+ vreinterpretq_s8_u8(v_neg_adjustment));
/* Store results. */
vst1q_u8(running_avg_y, v_running_avg_y);
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 98619bb30..71bf24c9f 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -17,6 +17,7 @@
EXPORT |vp9_h_predictor_16x16_neon|
EXPORT |vp9_h_predictor_32x32_neon|
EXPORT |vp9_tm_predictor_4x4_neon|
+ EXPORT |vp9_tm_predictor_8x8_neon|
ARM
REQUIRE8
PRESERVE8
@@ -328,8 +329,78 @@ loop_h
vqshrun.s16 d1, q2, #0
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
-
bx lr
ENDP ; |vp9_tm_predictor_4x4_neon|
+;void vp9_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
+; const uint8_t *above,
+; const uint8_t *left)
+; r0 uint8_t *dst
+; r1 ptrdiff_t y_stride
+; r2 const uint8_t *above
+; r3 const uint8_t *left
+
+|vp9_tm_predictor_8x8_neon| PROC
+ ; Load ytop_left = above[-1];
+ sub r12, r2, #1
+ ldrb r12, [r12]
+ vdup.u8 d0, r12
+
+ ; Load above 8 pixels
+ vld1.64 {d2}, [r2]
+
+ ; Compute above - ytop_left
+ vsubl.u8 q3, d2, d0
+
+ ; Load left row by row and compute left + (above - ytop_left)
+ ; 1st row and 2nd row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+
+ ; 3rd row and 4th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+
+ ; 5th row and 6th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+
+ ; 7rd row and 8th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+ bx lr
+ ENDP ; |vp9_tm_predictor_8x8_neon|
+
END
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index ff20553d6..ca42090c1 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -85,7 +85,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
int mi_size;
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL) < 0)
+ VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
@@ -154,7 +154,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
for (i = 0; i < cm->fb_count; i++) {
cm->fb_idx_ref_cnt[i] = 0;
if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
- VP9BORDERINPIXELS) < 0)
+ VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
}
@@ -167,7 +167,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
}
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9BORDERINPIXELS) < 0)
+ VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 21e2b16a4..ad78b0dc4 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -122,7 +122,6 @@ typedef struct {
TX_SIZE tx_size;
int_mv mv[2]; // for each reference frame used
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
- int_mv best_mv[2];
uint8_t mode_context[MAX_REF_FRAMES];
@@ -242,6 +241,9 @@ typedef struct macroblockd {
/* pointer to current frame */
const YV12_BUFFER_CONFIG *cur_buf;
+ /* mc buffer */
+ DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
+
int lossless;
/* Inverse transform function pointers. */
void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index f43a85f14..ba162fd20 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -112,8 +112,8 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
// This macro is currently unused but may be used by certain implementations
#define MAXBAND_INDEX 21
-extern const uint8_t vp9_coefband_trans_8x8plus[1024];
-extern const uint8_t vp9_coefband_trans_4x4[16];
+extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]);
+extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]);
static const uint8_t *get_band_translate(TX_SIZE tx_size) {
return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index 06adbabaa..cd89390d5 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -32,8 +32,10 @@ static INLINE void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
mv_ref_list, -1, mi_row, mi_col);
}
-#define LEFT_TOP_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
-#define RIGHT_BOTTOM_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
+#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS \
+ - VP9_INTERP_EXTEND) << 3)
+#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS \
+ - VP9_INTERP_EXTEND) << 3)
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 397f446f3..b5a9248c3 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -20,15 +20,16 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
-static void build_mc_border(const uint8_t *src, uint8_t *dst, int stride,
- int x, int y, int b_w, int b_h, int w, int h) {
+static void build_mc_border(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int x, int y, int b_w, int b_h, int w, int h) {
// Get a pointer to the start of the real data for this row.
- const uint8_t *ref_row = src - x - y * stride;
+ const uint8_t *ref_row = src - x - y * src_stride;
if (y >= h)
- ref_row += (h - 1) * stride;
+ ref_row += (h - 1) * src_stride;
else if (y > 0)
- ref_row += y * stride;
+ ref_row += y * src_stride;
do {
int right = 0, copy;
@@ -49,16 +50,16 @@ static void build_mc_border(const uint8_t *src, uint8_t *dst, int stride,
memset(dst, ref_row[0], left);
if (copy)
- memmove(dst + left, ref_row + x + left, copy);
+ memcpy(dst + left, ref_row + x + left, copy);
if (right)
memset(dst + left + copy, ref_row[w - 1], right);
- dst += stride;
+ dst += dst_stride;
++y;
if (y > 0 && y < h)
- ref_row += stride;
+ ref_row += src_stride;
} while (--b_h);
}
@@ -281,7 +282,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
MV32 scaled_mv;
int xs, ys, x0, y0, x0_16, y0_16, x1, y1, frame_width,
- frame_height, subpel_x, subpel_y;
+ frame_height, subpel_x, subpel_y, buf_stride;
uint8_t *ref_frame, *buf_ptr;
const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf;
@@ -308,7 +309,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
xs = sf->x_step_q4;
ys = sf->y_step_q4;
- // Get block position in the scaled reference frame.
+ // Map the top left corner of the block into the reference frame.
x0 = sf->scale_value_x(x0, sf);
y0 = sf->scale_value_y(y0, sf);
x0_16 = sf->scale_value_x(x0_16, sf);
@@ -321,7 +322,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
subpel_x = scaled_mv.col & SUBPEL_MASK;
subpel_y = scaled_mv.row & SUBPEL_MASK;
- // Get reference block top left coordinate.
+ // Calculate the top left corner of the best matching block in the reference frame.
x0 += scaled_mv.col >> SUBPEL_BITS;
y0 += scaled_mv.row >> SUBPEL_BITS;
x0_16 += scaled_mv.col;
@@ -329,24 +330,28 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
// Get reference block bottom right coordinate.
x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
- y1 = ((y0_16 + (h - 1) * xs) >> SUBPEL_BITS) + 1;
+ y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
// Get reference block pointer.
buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
+ buf_stride = pre_buf->stride;
- // Do border extension if there is motion or
+ // Do border extension if there is motion or the
// width/height is not a multiple of 8 pixels.
if (scaled_mv.col || scaled_mv.row ||
(frame_width & 0x7) || (frame_height & 0x7)) {
+ int x_pad = 0, y_pad = 0;
- if (subpel_x) {
+ if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) {
x0 -= VP9_INTERP_EXTEND - 1;
x1 += VP9_INTERP_EXTEND;
+ x_pad = 1;
}
- if (subpel_y) {
+ if (subpel_y || (sf->y_step_q4 & SUBPEL_MASK)) {
y0 -= VP9_INTERP_EXTEND - 1;
y1 += VP9_INTERP_EXTEND;
+ y_pad = 1;
}
// Skip border extension if block is inside the frame.
@@ -354,12 +359,14 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
// Extend the border.
- build_mc_border(buf_ptr1, buf_ptr1, pre_buf->stride, x0, y0, x1 - x0,
- y1 - y0, frame_width, frame_height);
+ build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0,
+ x0, y0, x1 - x0, y1 - y0, frame_width, frame_height);
+ buf_stride = x1 - x0;
+ buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
}
}
- inter_predictor(buf_ptr, pre_buf->stride, dst, dst_buf->stride, subpel_x,
+ inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
}
}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 8f858f47c..caa6947b3 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -96,7 +96,7 @@ prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint
specialize vp9_v_predictor_8x8 $sse_x86inc neon
prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_8x8 $sse2_x86inc dspr2
+specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2
prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2
@@ -742,7 +742,7 @@ specialize vp9_full_search_sad sse3 sse4_1
vp9_full_search_sad_sse3=vp9_full_search_sadx3
vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
-prototype int vp9_refining_search_sad "struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
specialize vp9_refining_search_sad sse3
vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
@@ -756,9 +756,5 @@ specialize vp9_full_range_search
prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
specialize vp9_temporal_filter_apply sse2
-prototype void vp9_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int fraction"
-specialize vp9_yv12_copy_partial_frame
-
-
fi
# end encoder functions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index f4f758297..f95423678 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -23,105 +23,20 @@ typedef void filter8_1dfunction (
const short *filter
);
-#if (HAVE_SSSE3)
+#if HAVE_SSSE3
+filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-#if (ARCH_X86_64)
-filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-
-void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Ensure the filter can be compressed to int16_t. */
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_intrin_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_intrin_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_intrin_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_intrin_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_intrin_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_intrin_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-#else
-filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
@@ -198,7 +113,6 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
w, h);
}
}
-#endif
void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
deleted file mode 100644
index 303fced3b..000000000
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <tmmintrin.h>
-#include "vpx_ports/mem.h"
-#include "vpx_ports/emmintrin_compat.h"
-
-
-// filters only for the 4_h8 convolution
-DECLARE_ALIGNED(16, const unsigned char,
-filt1_4_h8[16])= {0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6};
-
-DECLARE_ALIGNED(16, const unsigned char,
-filt2_4_h8[16])= {4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10};
-
-// filters for 8_h8 and 16_h8
-DECLARE_ALIGNED(16, const unsigned char,
-filt1_global[16])= {0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8};
-
-DECLARE_ALIGNED(16, const unsigned char,
-filt2_global[16])= {2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10};
-
-DECLARE_ALIGNED(16, const unsigned char,
-filt3_global[16])= {4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12};
-
-DECLARE_ALIGNED(16, const unsigned char,
-filt4_global[16])= {6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14};
-
-
-
-void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
- __m128i addFilterReg64, filtersReg, srcReg, minReg;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 =_mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter into the first lane
- firstFilters = _mm_shufflelo_epi16(filtersReg, 0);
- // duplicate only the third 16 bit in the filter into the first lane
- secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
- // duplicate only the seconds 16 bits in the filter into the second lane
- firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
- // duplicate only the forth 16 bits in the filter into the second lane
- secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
-
- // loading the local filters
- thirdFilters =_mm_load_si128((__m128i const *)filt1_4_h8);
- forthFilters = _mm_load_si128((__m128i const *)filt2_4_h8);
-
- for (i = 0; i < output_height; i++) {
- srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
-
- // filter the source buffer
- srcRegFilt1= _mm_shuffle_epi8(srcReg, thirdFilters);
- srcRegFilt2= _mm_shuffle_epi8(srcReg, forthFilters);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
- // extract the higher half of the lane
- srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8);
- srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8);
-
- minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2);
-
- // add and saturate all the results together
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
-
- srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2);
-
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
-
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
-
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bits
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
- src_ptr+=src_pixels_per_line;
-
- // save only 4 bytes
- *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1);
-
- output_ptr+=output_pitch;
- }
-}
-
-
-void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg;
- __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
- __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
- __m128i addFilterReg64, filtersReg, minReg;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 128 bit register
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 128 bit register
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 128 bit register
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 128 bit register
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- filt1Reg = _mm_load_si128((__m128i const *)filt1_global);
- filt2Reg = _mm_load_si128((__m128i const *)filt2_global);
- filt3Reg = _mm_load_si128((__m128i const *)filt3_global);
- filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
-
- for (i = 0; i < output_height; i++) {
- srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
-
- // filter the source buffer
- srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
- // filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg);
- srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters);
- srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters);
-
- // add and saturate all the results together
- minReg = _mm_min_epi16(srcRegFilt4, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
-
- srcRegFilt4= _mm_max_epi16(srcRegFilt4, srcRegFilt3);
-
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
-
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
-
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bits
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
- src_ptr+=src_pixels_per_line;
-
- // save only 8 bytes
- _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1);
-
- output_ptr+=output_pitch;
- }
-}
-
-void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
- __m128i addFilterReg64, filtersReg, srcReg1, srcReg2;
- __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt1_1, srcRegFilt2_1, srcRegFilt2, srcRegFilt3;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 128 bit register
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 128 bit register
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 128 bit register
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 128 bit register
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- filt1Reg = _mm_load_si128((__m128i const *)filt1_global);
- filt2Reg = _mm_load_si128((__m128i const *)filt2_global);
- filt3Reg = _mm_load_si128((__m128i const *)filt3_global);
- filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
-
- for (i = 0; i < output_height; i++) {
- srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
-
- // filter the source buffer
- srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt2Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt4Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
- _mm_min_epi16(srcRegFilt3, srcRegFilt2));
-
- // reading the next 16 bytes.
- // (part of it was being read by earlier read)
- srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
- _mm_max_epi16(srcRegFilt3, srcRegFilt2));
-
- // filter the source buffer
- srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt2Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
- // add and saturate the results together
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt4Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
- _mm_min_epi16(srcRegFilt3, srcRegFilt2));
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
- _mm_max_epi16(srcRegFilt3, srcRegFilt2));
-
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, addFilterReg64);
-
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
- srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1);
-
- src_ptr+=src_pixels_per_line;
-
- // save 16 bytes
- _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1);
-
- output_ptr+=output_pitch;
- }
-}
-
-
-
-void vp9_filter_block1d4_v8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- int16_t *filter) {
- __m128i addFilterReg64, filtersReg, firstFilters, secondFilters;
- __m128i minReg, srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter into the first lane
- firstFilters = _mm_shufflelo_epi16(filtersReg, 0);
- // duplicate only the second 16 bits in the filter into the second lane
- firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
- // duplicate only the third 16 bits in the filter into the first lane
- secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
- // duplicate only the forth 16 bits in the filter into the second lane
- secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
-
- for (i = 0; i < output_height; i++) {
- // load the first 4 byte
- srcRegFilt1 = _mm_cvtsi32_si128(*((int*)&src_ptr[0]));
- // load the next 4 bytes in stride of src_pitch
- srcRegFilt2 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch)[0]));
-
- // merge the result together
- srcRegFilt1 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2);
-
-
- srcRegFilt2 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*2)[0]));
- srcRegFilt3 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*3)[0]));
-
- // merge the result together
- srcRegFilt2 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3);
-
- srcRegFilt3 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*4)[0]));
- srcRegFilt4 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*5)[0]));
-
- // merge the result together
- srcRegFilt3 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4);
- srcRegFilt1 = _mm_unpacklo_epi64(srcRegFilt1, srcRegFilt2);
-
- srcRegFilt4 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*6)[0]));
- srcRegFilt2 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*7)[0]));
-
- // merge the result together
- srcRegFilt4 = _mm_unpacklo_epi8(srcRegFilt4, srcRegFilt2);
- srcRegFilt3 = _mm_unpacklo_epi64(srcRegFilt3, srcRegFilt4);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
-
- // extract the second lane of the 128 bit register
- srcRegFilt2 = _mm_srli_si128(srcRegFilt1, 8);
-
- // add and saturate the results together
- minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_srli_si128(srcRegFilt3, 8));
- srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
- src_ptr+=src_pitch;
-
- // save only 4 bytes convolve result
- *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1);
-
- output_ptr+=out_pitch;
- }
-}
-
-void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- int16_t *filter) {
- __m128i addFilterReg64, filtersReg, minReg, srcRegFilt6;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4, srcRegFilt5;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits in the filter
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits in the filter
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits in the filter
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- for (i = 0; i < output_height; i++) {
- // load the first 8 bytes
- srcRegFilt1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]);
- // load the next 8 bytes in stride of src_pitch
- srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch)[0]);
- srcRegFilt3 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*2)[0]);
- srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*3)[0]);
-
- // merge the result together
- srcRegFilt1 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2);
- srcRegFilt3 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4);
-
- // load the next 8 bytes in stride of src_pitch
- srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*4)[0]);
- srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*5)[0]);
- srcRegFilt5 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*6)[0]);
- srcRegFilt6 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*7)[0]);
-
- // merge the result together
- srcRegFilt2 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt4);
- srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt5, srcRegFilt6);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
- srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters);
-
- // add and saturate the results together
- minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5);
- srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
- src_ptr+=src_pitch;
-
- // save only 8 bytes convolve result
- _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1);
-
- output_ptr+=out_pitch;
- }
-}
-
-
-void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- int16_t *filter) {
- __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt2, srcRegFilt3;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt4, srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits in the filter
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits in the filter
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits in the filter
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
-
- for (i = 0; i < output_height; i++) {
- // load the first 16 bytes
- srcRegFilt1 = _mm_loadu_si128((__m128i *)(src_ptr));
- // load the next 16 bytes in stride of src_pitch
- srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch));
- srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6));
- srcRegFilt4 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7));
-
- // merge the result together
- srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2);
- srcRegFilt6 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4);
- srcRegFilt1 = _mm_unpackhi_epi8(srcRegFilt1, srcRegFilt2);
- srcRegFilt3 = _mm_unpackhi_epi8(srcRegFilt3, srcRegFilt4);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters);
- srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters);
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
-
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
-
- // load the next 16 bytes in stride of two/three src_pitch
- srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2));
- srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3));
-
- // merge the result together
- srcRegFilt4 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3);
- srcRegFilt6 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, secondFilters);
- srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters);
-
- // load the next 16 bytes in stride of four/five src_pitch
- srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4));
- srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5));
-
- // merge the result together
- srcRegFilt7 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3);
- srcRegFilt8 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters);
- srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters);
-
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
- _mm_min_epi16(srcRegFilt4, srcRegFilt7));
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_min_epi16(srcRegFilt6, srcRegFilt8));
-
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
- _mm_max_epi16(srcRegFilt4, srcRegFilt7));
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_max_epi16(srcRegFilt6, srcRegFilt8));
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7);
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1);
-
- src_ptr+=src_pitch;
-
- // save 16 bytes convolve result
- _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
-
- output_ptr+=out_pitch;
- }
-}
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 59faccdf7..d66ee2730 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -704,7 +704,7 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
if (vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, ext_fb,
+ VP9_DEC_BORDER_IN_PIXELS, ext_fb,
cm->realloc_fb_cb, cm->user_priv)) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate external frame buffer");
@@ -712,7 +712,7 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
} else {
vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL);
+ VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL);
}
}
@@ -1129,11 +1129,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
cm->show_existing_frame = vp9_rb_read_bit(rb);
if (cm->show_existing_frame) {
- // show an existing frame directly
+ // Show an existing frame directly.
int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->new_fb_idx, frame_to_show);
pbi->refresh_frame_flags = 0;
cm->lf.filter_level = 0;
+ cm->show_frame = 1;
return 0;
}
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index c81378153..2eb99ea15 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -357,9 +357,9 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi,
}
static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode,
- int_mv mv[2], int_mv best_mv[2],
- int_mv nearest_mv[2], int_mv near_mv[2],
- int is_compound, int allow_hp, vp9_reader *r) {
+ int_mv mv[2], int_mv ref_mv[2],
+ int_mv nearest_mv[2], int_mv near_mv[2],
+ int is_compound, int allow_hp, vp9_reader *r) {
int i;
int ret = 1;
@@ -367,10 +367,10 @@ static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode,
case NEWMV: {
nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ?
NULL : &cm->counts.mv;
- read_mv(r, &mv[0].as_mv, &best_mv[0].as_mv,
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv,
&cm->fc.nmvc, mv_counts, allow_hp);
if (is_compound)
- read_mv(r, &mv[1].as_mv, &best_mv[1].as_mv,
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv,
&cm->fc.nmvc, mv_counts, allow_hp);
for (i = 0; i < 1 + is_compound; ++i) {
ret = ret && mv[i].as_mv.row < MV_UPP && mv[i].as_mv.row > MV_LOW;
@@ -380,17 +380,20 @@ static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode,
}
case NEARESTMV: {
mv[0].as_int = nearest_mv[0].as_int;
- if (is_compound) mv[1].as_int = nearest_mv[1].as_int;
+ if (is_compound)
+ mv[1].as_int = nearest_mv[1].as_int;
break;
}
case NEARMV: {
mv[0].as_int = near_mv[0].as_int;
- if (is_compound) mv[1].as_int = near_mv[1].as_int;
+ if (is_compound)
+ mv[1].as_int = near_mv[1].as_int;
break;
}
case ZEROMV: {
mv[0].as_int = 0;
- if (is_compound) mv[1].as_int = 0;
+ if (is_compound)
+ mv[1].as_int = 0;
break;
}
default: {
@@ -423,7 +426,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
- int_mv nearest[2], nearmv[2], best[2];
+ int_mv nearestmv[2], nearmv[2];
int inter_mode_ctx, ref, is_compound;
read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
@@ -452,8 +455,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
for (ref = 0; ref < 1 + is_compound; ++ref) {
vp9_find_best_ref_mvs(xd, allow_hp, mbmi->ref_mvs[mbmi->ref_frame[ref]],
- &nearest[ref], &nearmv[ref]);
- best[ref].as_int = nearest[ref].as_int;
+ &nearestmv[ref], &nearmv[ref]);
}
}
@@ -466,6 +468,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
int b_mode;
+ int_mv nearest_sub8x8[2], near_sub8x8[2];
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
@@ -475,9 +478,11 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
if (b_mode == NEARESTMV || b_mode == NEARMV)
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, j, ref, mi_row, mi_col,
- &nearest[ref], &nearmv[ref]);
+ &nearest_sub8x8[ref],
+ &near_sub8x8[ref]);
- if (!assign_mv(cm, b_mode, block, best, nearest, nearmv,
+ if (!assign_mv(cm, b_mode, block, nearestmv,
+ nearest_sub8x8, near_sub8x8,
is_compound, allow_hp, r)) {
xd->corrupted |= 1;
break;
@@ -499,9 +504,8 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
} else {
- xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv,
- best, nearest, nearmv,
- is_compound, allow_hp, r);
+ xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv, nearestmv,
+ nearestmv, nearmv, is_compound, allow_hp, r);
}
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index ec4dc14f4..7188d7674 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -260,6 +260,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
struct segmentation *seg = &cm->seg;
MB_MODE_INFO *const mi = &m->mbmi;
const MV_REFERENCE_FRAME rf = mi->ref_frame[0];
+ const MV_REFERENCE_FRAME sec_rf = mi->ref_frame[1];
const MB_PREDICTION_MODE mode = mi->mode;
const int segment_id = mi->segment_id;
int skip_coeff;
@@ -355,11 +356,11 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
active_section = 11;
#endif
vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
- &mi->best_mv[0].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[rf][0].as_mv, nmvc, allow_hp);
if (has_second_ref(mi))
vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
- &mi->best_mv[1].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[sec_rf][0].as_mv, nmvc, allow_hp);
}
}
}
@@ -368,11 +369,11 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
active_section = 5;
#endif
vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
- &mi->best_mv[0].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[rf][0].as_mv, nmvc, allow_hp);
if (has_second_ref(mi))
vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
- &mi->best_mv[1].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[sec_rf][0].as_mv, nmvc, allow_hp);
}
}
}
@@ -745,7 +746,6 @@ static void update_coef_probs(VP9_COMP* cpi, vp9_writer* w) {
const TX_MODE tx_mode = cpi->common.tx_mode;
const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
TX_SIZE tx_size;
- vp9_clear_system_state();
for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size)
build_tree_distribution(cpi, tx_size);
@@ -1295,8 +1295,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
active_section = 7;
#endif
- vp9_clear_system_state(); // __asm emms;
-
first_part_size = write_compressed_header(cpi, data);
data += first_part_size;
vp9_wb_write_literal(&saved_wb, first_part_size, 16);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 75ea64a2f..9d02c8f95 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -500,17 +500,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
if (is_inter_block(mbmi) &&
(mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
int_mv best_mv[2];
- const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
- best_mv[0].as_int = ctx->best_ref_mv[0].as_int;
- best_mv[1].as_int = ctx->best_ref_mv[1].as_int;
- if (mbmi->mode == NEWMV) {
- best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int;
- if (rf2 > 0)
- best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int;
- }
- mbmi->best_mv[0].as_int = best_mv[0].as_int;
- mbmi->best_mv[1].as_int = best_mv[1].as_int;
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
vp9_update_mv_count(cpi, x, best_mv);
}
@@ -630,11 +621,11 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
}
}
-static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, int mi_col,
- int *totalrate, int64_t *totaldist,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd) {
+static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col,
+ int *totalrate, int64_t *totaldist,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1079,35 +1070,35 @@ static void pick_partition_type(VP9_COMP *cpi,
switch (partition) {
case PARTITION_NONE:
- pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist,
- bsize, get_block_context(x, bsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist,
+ bsize, get_block_context(x, bsize), INT64_MAX);
break;
case PARTITION_HORZ:
*get_sb_index(x, subsize) = 0;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
- subsize, get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
+ subsize, get_block_context(x, subsize), INT64_MAX);
if (bsize >= BLOCK_8X8 && mi_row + num_8x8_subsize < cm->mi_rows) {
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(x, subsize) = 1;
- pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col,
- &sub_rate[1], &sub_dist[1], subsize,
- get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col,
+ &sub_rate[1], &sub_dist[1], subsize,
+ get_block_context(x, subsize), INT64_MAX);
}
*rate = sub_rate[0] + sub_rate[1];
*dist = sub_dist[0] + sub_dist[1];
break;
case PARTITION_VERT:
*get_sb_index(x, subsize) = 0;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
- subsize, get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
+ subsize, get_block_context(x, subsize), INT64_MAX);
if (bsize >= BLOCK_8X8 && mi_col + num_8x8_subsize < cm->mi_cols) {
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(x, subsize) = 1;
- pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize,
- &sub_rate[1], &sub_dist[1], subsize,
- get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize,
+ &sub_rate[1], &sub_dist[1], subsize,
+ get_block_context(x, subsize), INT64_MAX);
}
*rate = sub_rate[0] + sub_rate[1];
*dist = sub_dist[1] + sub_dist[1];
@@ -1244,8 +1235,8 @@ static void rd_use_partition(VP9_COMP *cpi,
mi_row + (ms >> 1) < cm->mi_rows &&
mi_col + (ms >> 1) < cm->mi_cols) {
*(get_sb_partitioning(x, bsize)) = bsize;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
- get_block_context(x, bsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
+ get_block_context(x, bsize), INT64_MAX);
pl = partition_plane_context(cpi->above_seg_context,
cpi->left_seg_context,
@@ -1260,13 +1251,15 @@ static void rd_use_partition(VP9_COMP *cpi,
switch (partition) {
case PARTITION_NONE:
- pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
- bsize, get_block_context(x, bsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
+ &last_part_dist, bsize,
+ get_block_context(x, bsize), INT64_MAX);
break;
case PARTITION_HORZ:
*get_sb_index(x, subsize) = 0;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
- subsize, get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
+ &last_part_dist, subsize,
+ get_block_context(x, subsize), INT64_MAX);
if (last_part_rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
int rt = 0;
@@ -1274,8 +1267,8 @@ static void rd_use_partition(VP9_COMP *cpi,
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(x, subsize) = 1;
- pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, subsize,
- get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt,
+ subsize, get_block_context(x, subsize), INT64_MAX);
if (rt == INT_MAX || dt == INT_MAX) {
last_part_rate = INT_MAX;
last_part_dist = INT_MAX;
@@ -1288,8 +1281,9 @@ static void rd_use_partition(VP9_COMP *cpi,
break;
case PARTITION_VERT:
*get_sb_index(x, subsize) = 0;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
- subsize, get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
+ &last_part_dist, subsize,
+ get_block_context(x, subsize), INT64_MAX);
if (last_part_rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
int rt = 0;
@@ -1297,8 +1291,8 @@ static void rd_use_partition(VP9_COMP *cpi,
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(x, subsize) = 1;
- pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, subsize,
- get_block_context(x, subsize), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt,
+ subsize, get_block_context(x, subsize), INT64_MAX);
if (rt == INT_MAX || dt == INT_MAX) {
last_part_rate = INT_MAX;
last_part_dist = INT_MAX;
@@ -1372,9 +1366,9 @@ static void rd_use_partition(VP9_COMP *cpi,
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
- split_subsize, get_block_context(x, split_subsize),
- INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
+ split_subsize, get_block_context(x, split_subsize),
+ INT64_MAX);
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -1738,8 +1732,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// PARTITION_NONE
if (partition_none_allowed) {
- pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
- get_block_context(x, bsize), best_rd);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
+ get_block_context(x, bsize), best_rd);
if (this_rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
pl = partition_plane_context(cpi->above_seg_context,
@@ -1849,8 +1843,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
get_block_context(x, subsize)->pred_filter_type =
get_block_context(x, bsize)->mic.mbmi.interp_filter;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- get_block_context(x, subsize), best_rd);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+ get_block_context(x, subsize), best_rd);
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
@@ -1864,9 +1858,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
get_block_context(x, subsize)->pred_filter_type =
get_block_context(x, bsize)->mic.mbmi.interp_filter;
- pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
- &this_dist, subsize, get_block_context(x, subsize),
- best_rd - sum_rd);
+ rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
+ &this_dist, subsize, get_block_context(x, subsize),
+ best_rd - sum_rd);
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
} else {
@@ -1902,8 +1896,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
get_block_context(x, subsize)->pred_filter_type =
get_block_context(x, bsize)->mic.mbmi.interp_filter;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- get_block_context(x, subsize), best_rd);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+ get_block_context(x, subsize), best_rd);
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
update_state(cpi, get_block_context(x, subsize), subsize, 0);
@@ -1916,9 +1910,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
get_block_context(x, subsize)->pred_filter_type =
get_block_context(x, bsize)->mic.mbmi.interp_filter;
- pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
- &this_dist, subsize, get_block_context(x, subsize),
- best_rd - sum_rd);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
+ &this_dist, subsize, get_block_context(x, subsize),
+ best_rd - sum_rd);
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
} else {
@@ -1989,8 +1983,8 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
if ((mi_row + (ms >> 1) < cm->mi_rows) &&
(mi_col + (ms >> 1) < cm->mi_cols)) {
cpi->set_ref_frame_mask = 1;
- pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64,
- get_block_context(x, BLOCK_64X64), INT64_MAX);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64,
+ get_block_context(x, BLOCK_64X64), INT64_MAX);
pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
mi_row, mi_col, BLOCK_64X64);
r += x->partition_cost[pl][PARTITION_NONE];
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index af710a8f4..853094b29 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -224,13 +224,9 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w,
}
}
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h) {
- vp9_clear_system_state();
+ int usehp, int mvc_flag_v, int mvc_flag_h) {
vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree);
if (mvc_flag_v)
build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 538599d58..56872682a 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -317,31 +317,23 @@ static const double weight_table[256] = {
1.000000, 1.000000, 1.000000, 1.000000
};
-static double simple_weight(YV12_BUFFER_CONFIG *source) {
+static double simple_weight(const YV12_BUFFER_CONFIG *buf) {
int i, j;
+ double sum = 0.0;
+ const int w = buf->y_crop_width;
+ const int h = buf->y_crop_height;
+ const uint8_t *row = buf->y_buffer;
+
+ for (i = 0; i < h; ++i) {
+ const uint8_t *pixel = row;
+ for (j = 0; j < w; ++j)
+ sum += weight_table[*pixel++];
+ row += buf->y_stride;
+ }
- uint8_t *src = source->y_buffer;
- double sum_weights = 0.0;
-
- // Loop through the Y plane examining levels and creating a weight for
- // the image.
- i = source->y_height;
- do {
- j = source->y_width;
- do {
- sum_weights += weight_table[ *src];
- src++;
- } while (--j);
- src -= source->y_width;
- src += source->y_stride;
- } while (--i);
-
- sum_weights /= (source->y_height * source->y_width);
-
- return sum_weights;
+ return MAX(0.1, sum / (w * h));
}
-
// This function returns the maximum target rate per frame.
static int frame_max_bits(VP9_COMP *cpi) {
int64_t max_bits =
@@ -394,42 +386,35 @@ static unsigned int zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
- MV *ref_mv, MV *best_mv,
+ const MV *ref_mv, MV *best_mv,
YV12_BUFFER_CONFIG *recon_buffer,
int *best_motion_err, int recon_yoffset) {
MACROBLOCKD *const xd = &x->e_mbd;
- int num00;
-
MV tmp_mv = {0, 0};
- MV ref_mv_full;
-
- int tmp_err;
+ MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3};
+ int num00, tmp_err, n, sr = 0;
int step_param = 3;
int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
- int n;
- vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[xd->mi_8x8[0]->mbmi.sb_type];
+ const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
+ vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
int new_mv_mode_penalty = 256;
-
- int sr = 0;
- int quart_frm = MIN(cpi->common.width, cpi->common.height);
+ const int quart_frm = MIN(cpi->common.width, cpi->common.height);
// refine the motion search range accroding to the frame dimension
// for first pass test
while ((quart_frm << sr) < MAX_FULL_PEL_VAL)
sr++;
- step_param += sr;
+ step_param += sr;
further_steps -= sr;
// override the default variance function to use MSE
- v_fn_ptr.vf = get_block_variance_fn(xd->mi_8x8[0]->mbmi.sb_type);
+ v_fn_ptr.vf = get_block_variance_fn(bsize);
// Set up pointers for this macro block recon buffer
xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset;
// Initial step/diamond search centred on best mv
- ref_mv_full.col = ref_mv->col >> 3;
- ref_mv_full.row = ref_mv->row >> 3;
tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
step_param,
x->sadperbit16, &num00, &v_fn_ptr,
@@ -504,6 +489,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
int new_mv_count = 0;
int sum_in_vectors = 0;
uint32_t lastmv_as_int = 0;
+ struct twopass_rc *const twopass = &cpi->twopass;
int_mv zero_ref_mv;
@@ -792,20 +778,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
fps.intra_error = intra_error >> 8;
fps.coded_error = coded_error >> 8;
fps.sr_coded_error = sr_coded_error >> 8;
- fps.ssim_weighted_pred_err = fps.coded_error *
- MAX(0.1, simple_weight(cpi->Source));
- fps.pcnt_inter = 0.0;
- fps.pcnt_motion = 0.0;
- fps.MVr = 0.0;
- fps.mvr_abs = 0.0;
- fps.MVc = 0.0;
- fps.mvc_abs = 0.0;
- fps.MVrv = 0.0;
- fps.MVcv = 0.0;
- fps.mv_in_out_count = 0.0;
- fps.new_mv_count = 0.0;
+ fps.ssim_weighted_pred_err = fps.coded_error * simple_weight(cpi->Source);
fps.count = 1.0;
-
fps.pcnt_inter = (double)intercount / cm->MBs;
fps.pcnt_second_ref = (double)second_ref_count / cm->MBs;
fps.pcnt_neutral = (double)neutral_count / cm->MBs;
@@ -821,7 +795,17 @@ void vp9_first_pass(VP9_COMP *cpi) {
mvcount;
fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
fps.new_mv_count = new_mv_count;
- fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs;
+ fps.pcnt_motion = (double)mvcount / cpi->common.MBs;
+ } else {
+ fps.MVr = 0.0;
+ fps.mvr_abs = 0.0;
+ fps.MVc = 0.0;
+ fps.mvc_abs = 0.0;
+ fps.MVrv = 0.0;
+ fps.MVcv = 0.0;
+ fps.mv_in_out_count = 0.0;
+ fps.new_mv_count = 0.0;
+ fps.pcnt_motion = 0.0;
}
// TODO(paulwilkins): Handle the case when duration is set to 0, or
@@ -830,23 +814,22 @@ void vp9_first_pass(VP9_COMP *cpi) {
fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start);
// don't want to do output stats with a stack variable!
- cpi->twopass.this_frame_stats = fps;
- output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.this_frame_stats);
- accumulate_stats(&cpi->twopass.total_stats, &fps);
+ twopass->this_frame_stats = fps;
+ output_stats(cpi, cpi->output_pkt_list, &twopass->this_frame_stats);
+ accumulate_stats(&twopass->total_stats, &fps);
}
// Copy the previous Last Frame back into gf and and arf buffers if
// the prediction is good enough... but also dont allow it to lag too far
- if ((cpi->twopass.sr_update_lag > 3) ||
+ if ((twopass->sr_update_lag > 3) ||
((cm->current_video_frame > 0) &&
- (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) &&
- ((cpi->twopass.this_frame_stats.intra_error /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) >
- 2.0))) {
+ (twopass->this_frame_stats.pcnt_inter > 0.20) &&
+ ((twopass->this_frame_stats.intra_error /
+ DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
- cpi->twopass.sr_update_lag = 1;
+ twopass->sr_update_lag = 1;
} else {
- cpi->twopass.sr_update_lag++;
+ twopass->sr_update_lag++;
}
// swap frame pointers so last frame refers to the frame we just compressed
swap_yv12(lst_yv12, new_yv12);
@@ -1034,37 +1017,38 @@ extern void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_init_second_pass(VP9_COMP *cpi) {
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS *start_pos;
+ struct twopass_rc *const twopass = &cpi->twopass;
- zero_stats(&cpi->twopass.total_stats);
- zero_stats(&cpi->twopass.total_left_stats);
+ zero_stats(&twopass->total_stats);
+ zero_stats(&twopass->total_left_stats);
- if (!cpi->twopass.stats_in_end)
+ if (!twopass->stats_in_end)
return;
- cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
- cpi->twopass.total_left_stats = cpi->twopass.total_stats;
+ twopass->total_stats = *twopass->stats_in_end;
+ twopass->total_left_stats = twopass->total_stats;
// each frame can have a different duration, as the frame rate in the source
// isn't guaranteed to be constant. The frame rate prior to the first frame
// encoded in the second pass is a guess. However the sum duration is not.
// Its calculated based on the actual durations of all frames from the first
// pass.
- vp9_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count /
- cpi->twopass.total_stats.duration);
+ vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count /
+ twopass->total_stats.duration);
cpi->output_framerate = cpi->oxcf.framerate;
- cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration *
- cpi->oxcf.target_bandwidth / 10000000.0);
+ twopass->bits_left = (int64_t)(twopass->total_stats.duration *
+ cpi->oxcf.target_bandwidth / 10000000.0);
// Calculate a minimum intra value to be used in determining the IIratio
// scores used in the second pass. We have this minimum to make sure
// that clips that are static but "low complexity" in the intra domain
// are still boosted appropriately for KF/GF/ARF
- cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
- cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+ twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
+ twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
// This variable monitors how far behind the second ref update is lagging
- cpi->twopass.sr_update_lag = 1;
+ twopass->sr_update_lag = 1;
// Scan the first pass file and calculate an average Intra / Inter error score
// ratio for the sequence.
@@ -1072,43 +1056,43 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
double sum_iiratio = 0.0;
double IIRatio;
- start_pos = cpi->twopass.stats_in; // Note the starting "file" position.
+ start_pos = twopass->stats_in; // Note the starting "file" position.
- while (input_stats(&cpi->twopass, &this_frame) != EOF) {
+ while (input_stats(twopass, &this_frame) != EOF) {
IIRatio = this_frame.intra_error
/ DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio;
sum_iiratio += IIRatio;
}
- cpi->twopass.avg_iiratio = sum_iiratio /
- DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count);
+ twopass->avg_iiratio = sum_iiratio /
+ DOUBLE_DIVIDE_CHECK((double)twopass->total_stats.count);
// Reset file position
- reset_fpf_position(&cpi->twopass, start_pos);
+ reset_fpf_position(twopass, start_pos);
}
// Scan the first pass file and calculate a modified total error based upon
// the bias/power function used to allocate bits.
{
- double av_error = cpi->twopass.total_stats.ssim_weighted_pred_err /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.count);
+ double av_error = twopass->total_stats.ssim_weighted_pred_err /
+ DOUBLE_DIVIDE_CHECK(twopass->total_stats.count);
- start_pos = cpi->twopass.stats_in; // Note starting "file" position
+ start_pos = twopass->stats_in; // Note starting "file" position
- cpi->twopass.modified_error_total = 0.0;
- cpi->twopass.modified_error_min =
+ twopass->modified_error_total = 0.0;
+ twopass->modified_error_min =
(av_error * cpi->oxcf.two_pass_vbrmin_section) / 100;
- cpi->twopass.modified_error_max =
+ twopass->modified_error_max =
(av_error * cpi->oxcf.two_pass_vbrmax_section) / 100;
- while (input_stats(&cpi->twopass, &this_frame) != EOF) {
- cpi->twopass.modified_error_total +=
+ while (input_stats(twopass, &this_frame) != EOF) {
+ twopass->modified_error_total +=
calculate_modified_err(cpi, &this_frame);
}
- cpi->twopass.modified_error_left = cpi->twopass.modified_error_total;
+ twopass->modified_error_left = twopass->modified_error_total;
- reset_fpf_position(&cpi->twopass, start_pos); // Reset file position
+ reset_fpf_position(twopass, start_pos);
}
}
@@ -1965,7 +1949,10 @@ void vp9_get_one_pass_params(VP9_COMP *cpi) {
cpi->rc.frames_to_key == 0 ||
(cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
cm->frame_type = KEY_FRAME;
+ cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
+ cpi->rc.frames_to_key == 0;
cpi->rc.frames_to_key = cpi->key_frame_frequency;
+ cpi->rc.kf_boost = 300;
} else {
cm->frame_type = INTER_FRAME;
}
@@ -1982,7 +1969,10 @@ void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) {
cpi->rc.frames_to_key == 0 ||
(cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
cm->frame_type = KEY_FRAME;
+ cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
+ cpi->rc.frames_to_key == 0;
cpi->rc.frames_to_key = cpi->key_frame_frequency;
+ cpi->rc.kf_boost = 300;
} else {
cm->frame_type = INTER_FRAME;
}
@@ -2054,7 +2044,8 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) {
this_frame_coded_error = this_frame.coded_error;
// keyframe and section processing !
- if (rc->frames_to_key == 0) {
+ if (rc->frames_to_key == 0 ||
+ (cpi->common.frame_flags & FRAMEFLAGS_KEY)) {
// Define next KF group and assign bits to it
this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
@@ -2225,12 +2216,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
RATE_CONTROL *const rc = &cpi->rc;
+ struct twopass_rc *const twopass = &cpi->twopass;
vp9_zero(next_frame);
vp9_clear_system_state(); // __asm emms;
- start_position = cpi->twopass.stats_in;
+ start_position = twopass->stats_in;
cpi->common.frame_type = KEY_FRAME;
// is this a forced key frame by interval
@@ -2247,14 +2239,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Take a copy of the initial frame details
first_frame = *this_frame;
- cpi->twopass.kf_group_bits = 0; // Total bits available to kf group
- cpi->twopass.kf_group_error_left = 0; // Group modified error score.
+ twopass->kf_group_bits = 0; // Total bits available to kf group
+ twopass->kf_group_error_left = 0; // Group modified error score.
kf_mod_err = calculate_modified_err(cpi, this_frame);
// find the next keyframe
i = 0;
- while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) {
+ while (twopass->stats_in < twopass->stats_in_end) {
// Accumulate kf group error
kf_group_err += calculate_modified_err(cpi, this_frame);
@@ -2266,11 +2258,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// load a the next frame's stats
last_frame = *this_frame;
- input_stats(&cpi->twopass, this_frame);
+ input_stats(twopass, this_frame);
// Provided that we are not at the end of the file...
if (cpi->oxcf.auto_key &&
- lookup_next_frame_stats(&cpi->twopass, &next_frame) != EOF) {
+ lookup_next_frame_stats(twopass, &next_frame) != EOF) {
// Normal scene cut check
if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
break;
@@ -2320,7 +2312,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
tmp_frame = first_frame;
// Reset to the start of the group
- reset_fpf_position(&cpi->twopass, start_position);
+ reset_fpf_position(twopass, start_position);
kf_group_err = 0;
kf_group_intra_err = 0;
@@ -2334,17 +2326,17 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
kf_group_coded_err += tmp_frame.coded_error;
// Load a the next frame's stats
- input_stats(&cpi->twopass, &tmp_frame);
+ input_stats(twopass, &tmp_frame);
}
rc->next_key_frame_forced = 1;
- } else if (cpi->twopass.stats_in == cpi->twopass.stats_in_end) {
+ } else if (twopass->stats_in == twopass->stats_in_end) {
rc->next_key_frame_forced = 1;
} else {
rc->next_key_frame_forced = 0;
}
// Special case for the last key frame of the file
- if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) {
+ if (twopass->stats_in >= twopass->stats_in_end) {
// Accumulate kf group error
kf_group_err += calculate_modified_err(cpi, this_frame);
@@ -2356,8 +2348,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Calculate the number of bits that should be assigned to the kf group.
- if ((cpi->twopass.bits_left > 0) &&
- (cpi->twopass.modified_error_left > 0.0)) {
+ if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
// Max for a single normal frame (not key frame)
int max_bits = frame_max_bits(cpi);
@@ -2366,19 +2357,18 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Default allocation based on bits left and relative
// complexity of the section
- cpi->twopass.kf_group_bits = (int64_t)(cpi->twopass.bits_left *
- (kf_group_err /
- cpi->twopass.modified_error_left));
+ twopass->kf_group_bits = (int64_t)(twopass->bits_left *
+ (kf_group_err / twopass->modified_error_left));
// Clip based on maximum per frame rate defined by the user.
max_grp_bits = (int64_t)max_bits * (int64_t)rc->frames_to_key;
- if (cpi->twopass.kf_group_bits > max_grp_bits)
- cpi->twopass.kf_group_bits = max_grp_bits;
+ if (twopass->kf_group_bits > max_grp_bits)
+ twopass->kf_group_bits = max_grp_bits;
} else {
- cpi->twopass.kf_group_bits = 0;
+ twopass->kf_group_bits = 0;
}
// Reset the first pass file position
- reset_fpf_position(&cpi->twopass, start_position);
+ reset_fpf_position(twopass, start_position);
// Determine how big to make this keyframe based on how well the subsequent
// frames use inter blocks.
@@ -2390,7 +2380,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
for (i = 0; i < rc->frames_to_key; i++) {
double r;
- if (EOF == input_stats(&cpi->twopass, &next_frame))
+ if (EOF == input_stats(twopass, &next_frame))
break;
// Monitor for static sections.
@@ -2402,11 +2392,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// For the first few frames collect data to decide kf boost.
if (i <= (rc->max_gf_interval * 2)) {
- if (next_frame.intra_error > cpi->twopass.kf_intra_err_min)
+ if (next_frame.intra_error > twopass->kf_intra_err_min)
r = (IIKFACTOR2 * next_frame.intra_error /
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
else
- r = (IIKFACTOR2 * cpi->twopass.kf_intra_err_min /
+ r = (IIKFACTOR2 * twopass->kf_intra_err_min /
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
if (r > RMAX)
@@ -2428,21 +2418,21 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
FIRSTPASS_STATS sectionstats;
zero_stats(&sectionstats);
- reset_fpf_position(&cpi->twopass, start_position);
+ reset_fpf_position(twopass, start_position);
for (i = 0; i < rc->frames_to_key; i++) {
- input_stats(&cpi->twopass, &next_frame);
+ input_stats(twopass, &next_frame);
accumulate_stats(&sectionstats, &next_frame);
}
avg_stats(&sectionstats);
- cpi->twopass.section_intra_rating = (int) (sectionstats.intra_error /
+ twopass->section_intra_rating = (int) (sectionstats.intra_error /
DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
}
// Reset the first pass file position
- reset_fpf_position(&cpi->twopass, start_position);
+ reset_fpf_position(twopass, start_position);
// Work out how many bits to allocate for the key frame itself
if (1) {
@@ -2459,7 +2449,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Make a note of baseline boost and the zero motion
// accumulator value for use elsewhere.
rc->kf_boost = kf_boost;
- cpi->twopass.kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
+ twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
// We do three calculations for kf size.
// The first is based on the error score for the whole kf group.
@@ -2474,11 +2464,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// cpi->rc.frames_to_key-1 because key frame itself is taken
// care of by kf_boost.
if (zero_motion_accumulator >= 0.99) {
- allocation_chunks =
- ((rc->frames_to_key - 1) * 10) + kf_boost;
+ allocation_chunks = ((rc->frames_to_key - 1) * 10) + kf_boost;
} else {
- allocation_chunks =
- ((rc->frames_to_key - 1) * 100) + kf_boost;
+ allocation_chunks = ((rc->frames_to_key - 1) * 100) + kf_boost;
}
// Prevent overflow
@@ -2488,58 +2476,54 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
allocation_chunks /= divisor;
}
- cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0
- : cpi->twopass.kf_group_bits;
+ twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0
+ : twopass->kf_group_bits;
// Calculate the number of bits to be spent on the key frame
- cpi->twopass.kf_bits = (int)((double)kf_boost *
- ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks));
+ twopass->kf_bits = (int)((double)kf_boost *
+ ((double)twopass->kf_group_bits / allocation_chunks));
// If the key frame is actually easier than the average for the
// kf group (which does sometimes happen... eg a blank intro frame)
// Then use an alternate calculation based on the kf error score
// which should give a smaller key frame.
if (kf_mod_err < kf_group_err / rc->frames_to_key) {
- double alt_kf_grp_bits =
- ((double)cpi->twopass.bits_left *
+ double alt_kf_grp_bits = ((double)twopass->bits_left *
(kf_mod_err * (double)rc->frames_to_key) /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left));
+ DOUBLE_DIVIDE_CHECK(twopass->modified_error_left));
alt_kf_bits = (int)((double)kf_boost *
(alt_kf_grp_bits / (double)allocation_chunks));
- if (cpi->twopass.kf_bits > alt_kf_bits) {
- cpi->twopass.kf_bits = alt_kf_bits;
- }
+ if (twopass->kf_bits > alt_kf_bits)
+ twopass->kf_bits = alt_kf_bits;
} else {
// Else if it is much harder than other frames in the group make sure
// it at least receives an allocation in keeping with its relative
// error score
- alt_kf_bits = (int)((double)cpi->twopass.bits_left *
- (kf_mod_err /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left)));
+ alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
+ DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)));
- if (alt_kf_bits > cpi->twopass.kf_bits) {
- cpi->twopass.kf_bits = alt_kf_bits;
+ if (alt_kf_bits > twopass->kf_bits) {
+ twopass->kf_bits = alt_kf_bits;
}
}
- cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits;
+ twopass->kf_group_bits -= twopass->kf_bits;
// Peer frame bit target for this frame
- rc->per_frame_bandwidth = cpi->twopass.kf_bits;
+ rc->per_frame_bandwidth = twopass->kf_bits;
// Convert to a per second bitrate
- cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
- cpi->output_framerate);
+ cpi->target_bandwidth = (int)(twopass->kf_bits * cpi->output_framerate);
}
// Note the total error score of the kf group minus the key frame itself
- cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err);
+ twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
// Adjust the count of total modified error left.
// The count of bits left is adjusted elsewhere based on real coded frame
// sizes.
- cpi->twopass.modified_error_left -= kf_group_err;
+ twopass->modified_error_left -= kf_group_err;
}
void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index 277bd7db1..ee73ff15a 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -73,7 +73,7 @@ struct lookahead_ctx * vp9_lookahead_init(unsigned int width,
for (i = 0; i < depth; i++)
if (vp9_alloc_frame_buffer(&ctx->buf[i].img,
width, height, subsampling_x, subsampling_y,
- VP9BORDERINPIXELS))
+ VP9_ENC_BORDER_IN_PIXELS))
goto bail;
}
return ctx;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index efb5ce16d..ad9cc00b1 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -24,10 +24,15 @@
// #define NEW_DIAMOND_SEARCH
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
- const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
- const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
- const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
- const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
+ int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
+ int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
+ int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
+ int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
+
+ col_min = MAX(col_min, (MV_LOW >> 3) + 1);
+ row_min = MAX(row_min, (MV_LOW >> 3) + 1);
+ col_max = MIN(col_max, (MV_UPP >> 3) - 1);
+ row_max = MIN(row_max, (MV_UPP >> 3) - 1);
// Get intersection of UMV window and valid MV window to reduce # of checks
// in diamond search.
@@ -174,8 +179,10 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
error_per_bit + 4096) >> 13 : 0)
-#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset
- // for svf calc
+// convert motion vector component to offset for svf calc
+static INLINE int sp(int x) {
+ return (x & 7) << 1;
+}
#define IFMVCV(r, c, s, e) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
@@ -183,12 +190,14 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
else \
e;
-/* pointer to predictor base of a motionvector */
-#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
+static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) {
+ return &buf[(r >> 3) * stride + (c >> 3) - offset];
+}
/* returns subpixel variance error function */
#define DIST(r, c) \
- vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
+ vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
+ src_stride, &sse)
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
@@ -358,7 +367,7 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
- vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
+ vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
z, src_stride, &sse, second_pred)
int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
@@ -1703,58 +1712,51 @@ int vp9_full_search_sadx8(MACROBLOCK *x, MV *ref_mv,
else
return INT_MAX;
}
-int vp9_refining_search_sad_c(MACROBLOCK *x,
+
+int vp9_refining_search_sad_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range, vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv) {
- const MACROBLOCKD* const xd = &x->e_mbd;
- MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
int i, j;
- int this_row_offset, this_col_offset;
- int what_stride = x->plane[0].src.stride;
- int in_what_stride = xd->plane[0].pre[0].stride;
- uint8_t *what = x->plane[0].src.buf;
- uint8_t *best_address = xd->plane[0].pre[0].buf +
- (ref_mv->row * xd->plane[0].pre[0].stride) +
- ref_mv->col;
- uint8_t *check_here;
+ const int what_stride = x->plane[0].src.stride;
+ const uint8_t *const what = x->plane[0].src.buf;
+ const int in_what_stride = xd->plane[0].pre[0].stride;
+ const uint8_t *const in_what = xd->plane[0].pre[0].buf;
+ const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
+ ref_mv->col];
unsigned int thissad;
+
+ const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
MV this_mv;
- unsigned int bestsad = INT_MAX;
- MV fcenter_mv;
- int *mvjsadcost = x->nmvjointsadcost;
+ const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
- fcenter_mv.row = center_mv->row >> 3;
- fcenter_mv.col = center_mv->col >> 3;
-
- bestsad = fn_ptr->sdf(what, what_stride, best_address,
- in_what_stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, error_per_bit);
+ unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
for (j = 0; j < 4; j++) {
- this_row_offset = ref_mv->row + neighbors[j].row;
- this_col_offset = ref_mv->col + neighbors[j].col;
-
- if ((this_col_offset > x->mv_col_min) &&
- (this_col_offset < x->mv_col_max) &&
- (this_row_offset > x->mv_row_min) &&
- (this_row_offset < x->mv_row_max)) {
- check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
- best_address;
+ this_mv.row = ref_mv->row + neighbors[j].row;
+ this_mv.col = ref_mv->col + neighbors[j].col;
+
+ if ((this_mv.col > x->mv_col_min) &&
+ (this_mv.col < x->mv_col_max) &&
+ (this_mv.row > x->mv_row_min) &&
+ (this_mv.row < x->mv_row_max)) {
+ const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
+ this_mv.col];
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
bestsad);
if (thissad < bestsad) {
- this_mv.row = this_row_offset;
- this_mv.col = this_col_offset;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
mvjsadcost, mvsadcost, error_per_bit);
@@ -1771,8 +1773,7 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
} else {
ref_mv->row += neighbors[best_site].row;
ref_mv->col += neighbors[best_site].col;
- best_address += (neighbors[best_site].row) * in_what_stride +
- neighbors[best_site].col;
+ best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col];
}
}
@@ -1782,13 +1783,12 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
if (bestsad < INT_MAX)
return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
(unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv,
- mvjcost, mvcost, x->errorperbit);
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
-int vp9_refining_search_sadx4(MACROBLOCK *x,
+int vp9_refining_search_sadx4(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range, vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
@@ -1810,7 +1810,7 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
unsigned int bestsad = INT_MAX;
MV fcenter_mv;
- int *mvjsadcost = x->nmvjointsadcost;
+ const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
fcenter_mv.row = center_mv->row >> 3;
@@ -1903,65 +1903,54 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
return INT_MAX;
}
-/* This function is called when we do joint motion search in comp_inter_inter
- * mode.
- */
-int vp9_refining_search_8p_c(MACROBLOCK *x,
+// This function is called when we do joint motion search in comp_inter_inter
+// mode.
+int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range, vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2], const MV *center_mv,
const uint8_t *second_pred, int w, int h) {
- const MACROBLOCKD* const xd = &x->e_mbd;
- MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
- {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
+ {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
int i, j;
- int this_row_offset, this_col_offset;
- int what_stride = x->plane[0].src.stride;
- int in_what_stride = xd->plane[0].pre[0].stride;
- uint8_t *what = x->plane[0].src.buf;
- uint8_t *best_address = xd->plane[0].pre[0].buf +
- (ref_mv->row * xd->plane[0].pre[0].stride) +
- ref_mv->col;
- uint8_t *check_here;
+ const uint8_t *what = x->plane[0].src.buf;
+ const int what_stride = x->plane[0].src.stride;
+ const uint8_t *in_what = xd->plane[0].pre[0].buf;
+ const int in_what_stride = xd->plane[0].pre[0].stride;
+ const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
+ ref_mv->col];
unsigned int thissad;
MV this_mv;
- unsigned int bestsad = INT_MAX;
- MV fcenter_mv;
+ const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- int *mvjsadcost = x->nmvjointsadcost;
+ const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
- fcenter_mv.row = center_mv->row >> 3;
- fcenter_mv.col = center_mv->col >> 3;
-
/* Get compound pred by averaging two pred blocks. */
- bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
- second_pred, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, error_per_bit);
+ unsigned int bestsad = fn_ptr->sdaf(what, what_stride,
+ best_address, in_what_stride,
+ second_pred, 0x7fffffff) +
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
- for (i = 0; i < search_range; i++) {
+ for (i = 0; i < search_range; ++i) {
int best_site = -1;
for (j = 0; j < 8; j++) {
- this_row_offset = ref_mv->row + neighbors[j].row;
- this_col_offset = ref_mv->col + neighbors[j].col;
+ this_mv.row = ref_mv->row + neighbors[j].row;
+ this_mv.col = ref_mv->col + neighbors[j].col;
- if ((this_col_offset > x->mv_col_min) &&
- (this_col_offset < x->mv_col_max) &&
- (this_row_offset > x->mv_row_min) &&
- (this_row_offset < x->mv_row_max)) {
- check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
- best_address;
+ if ((this_mv.col > x->mv_col_min) &&
+ (this_mv.col < x->mv_col_max) &&
+ (this_mv.row > x->mv_row_min) &&
+ (this_mv.row < x->mv_row_max)) {
+ const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
+ this_mv.col];
- /* Get compound block and use it to calculate SAD. */
thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
second_pred, bestsad);
-
if (thissad < bestsad) {
- this_mv.row = this_row_offset;
- this_mv.col = this_col_offset;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
@@ -1977,8 +1966,7 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
} else {
ref_mv->row += neighbors[best_site].row;
ref_mv->col += neighbors[best_site].col;
- best_address += (neighbors[best_site].row) * in_what_stride +
- neighbors[best_site].col;
+ best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col];
}
}
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index b3d89752d..74035842f 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -108,7 +108,7 @@ typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
const MV *center_mv, int n);
-typedef int (*vp9_refining_search_fn_t)(MACROBLOCK *x,
+typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
MV *ref_mv, int sad_per_bit,
int distance,
vp9_variance_fn_ptr_t *fn_ptr,
@@ -123,7 +123,7 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
const MV *center_mv);
-int vp9_refining_search_8p_c(MACROBLOCK *x,
+int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range, vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 42d4196c5..27531d232 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -36,6 +36,7 @@
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_temporal_filter.h"
#include "vp9/encoder/vp9_vaq.h"
+#include "vp9/encoder/vp9_resize.h"
#include "vpx_ports/vpx_timer.h"
@@ -968,7 +969,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
cpi->oxcf.width, cpi->oxcf.height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL))
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
}
@@ -983,14 +984,14 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
if (vp9_alloc_frame_buffer(&cpi->last_frame_uf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS))
+ VP9_ENC_BORDER_IN_PIXELS))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
if (vp9_alloc_frame_buffer(&cpi->scaled_source,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS))
+ VP9_ENC_BORDER_IN_PIXELS))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
@@ -1036,14 +1037,14 @@ static void update_frame_size(VP9_COMP *cpi) {
if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL))
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate last frame buffer");
if (vp9_realloc_frame_buffer(&cpi->scaled_source,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL))
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate scaled source buffer");
@@ -2295,6 +2296,42 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
}
#endif
+static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
+ YV12_BUFFER_CONFIG *dst_fb) {
+ const int in_w = src_fb->y_crop_width;
+ const int in_h = src_fb->y_crop_height;
+ const int out_w = dst_fb->y_crop_width;
+ const int out_h = dst_fb->y_crop_height;
+ const int in_w_uv = src_fb->uv_crop_width;
+ const int in_h_uv = src_fb->uv_crop_height;
+ const int out_w_uv = dst_fb->uv_crop_width;
+ const int out_h_uv = dst_fb->uv_crop_height;
+ int i;
+
+ uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
+ src_fb->alpha_buffer};
+ int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
+ src_fb->alpha_stride};
+
+ uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
+ dst_fb->alpha_buffer};
+ int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
+ dst_fb->alpha_stride};
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ if (i == 0 || i == 3) {
+ // Y and alpha planes
+ vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i],
+ dsts[i], out_h, out_w, dst_strides[i]);
+ } else {
+ // Chroma planes
+ vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i],
+ dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
+ }
+ }
+ vp8_yv12_extend_frame_borders(dst_fb);
+}
+
static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
YV12_BUFFER_CONFIG *dst_fb) {
const int in_w = src_fb->y_crop_width;
@@ -2316,7 +2353,7 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
for (y = 0; y < out_h; y += 16) {
for (x = 0; x < out_w; x += 16) {
for (i = 0; i < MAX_MB_PLANE; ++i) {
- const int factor = i == 0 ? 1 : 2;
+ const int factor = (i == 0 || i == 3 ? 1 : 2);
const int x_q4 = x * (16 / factor) * in_w / out_w;
const int y_q4 = y * (16 / factor) * in_h / out_h;
const int src_stride = src_strides[i];
@@ -2552,7 +2589,7 @@ static void scale_references(VP9_COMP *cpi) {
vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb],
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL);
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
} else {
@@ -2924,7 +2961,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
/* Scale the source buffer, if required. */
if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width ||
cm->mi_rows * 8 != cpi->un_scaled_source->y_height) {
- scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source);
+ scale_and_extend_frame_nonnormative(cpi->un_scaled_source,
+ &cpi->scaled_source);
cpi->Source = &cpi->scaled_source;
} else {
cpi->Source = cpi->un_scaled_source;
@@ -3279,12 +3317,12 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
vp9_twopass_postencode_update(cpi, *size);
}
-static void check_initial_width(VP9_COMP *cpi, YV12_BUFFER_CONFIG *sd) {
+static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
+ int subsampling_y) {
VP9_COMMON *const cm = &cpi->common;
if (!cpi->initial_width) {
- // TODO(agrange) Subsampling defaults to assuming sampled chroma.
- cm->subsampling_x = sd != NULL ? (sd->uv_width < sd->y_width) : 1;
- cm->subsampling_y = sd != NULL ? (sd->uv_height < sd->y_height) : 1;
+ cm->subsampling_x = subsampling_x;
+ cm->subsampling_y = subsampling_y;
alloc_raw_frame_buffers(cpi);
cpi->initial_width = cm->width;
cpi->initial_height = cm->height;
@@ -3298,8 +3336,10 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
VP9_COMP *cpi = (VP9_COMP *) ptr;
struct vpx_usec_timer timer;
int res = 0;
+ const int subsampling_x = sd->uv_width < sd->y_width;
+ const int subsampling_y = sd->uv_height < sd->y_height;
- check_initial_width(cpi, sd);
+ check_initial_width(cpi, subsampling_x, subsampling_y);
vpx_usec_timer_start(&timer);
if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags,
cpi->active_map_enabled ? cpi->active_map : NULL))
@@ -3377,7 +3417,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
struct vpx_usec_timer cmptimer;
YV12_BUFFER_CONFIG *force_src_buffer = NULL;
MV_REFERENCE_FRAME ref_frame;
- // FILE *fp_out = fopen("enc_frame_type.txt", "a");
if (!cpi)
return -1;
@@ -3499,8 +3538,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
vp9_end_first_pass(cpi); /* get last stats packet */
cpi->twopass.first_pass_done = 1;
}
-
- // fclose(fp_out);
return -1;
}
@@ -3543,7 +3580,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL);
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -3669,7 +3706,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
}
#endif
- // fclose(fp_out);
return 0;
}
@@ -3689,7 +3725,8 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
*dest = *cpi->common.frame_to_show;
dest->y_width = cpi->common.width;
dest->y_height = cpi->common.height;
- dest->uv_height = cpi->common.height / 2;
+ dest->uv_width = cpi->common.width >> cpi->common.subsampling_x;
+ dest->uv_height = cpi->common.height >> cpi->common.subsampling_y;
ret = 0;
} else {
ret = -1;
@@ -3797,7 +3834,7 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
VP9_COMP *cpi = (VP9_COMP *)comp;
VP9_COMMON *cm = &cpi->common;
- check_initial_width(cpi, NULL);
+ check_initial_width(cpi, 1, 1);
if (width) {
cm->width = width;
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 7a5282dda..a4ceabdf1 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -20,77 +20,43 @@
#include "vp9/common/vp9_loopfilter.h"
#include "./vpx_scale_rtcd.h"
-void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc, int fraction) {
- const int height = src_ybc->y_height;
- const int stride = src_ybc->y_stride;
- const int offset = stride * ((height >> 5) * 16 - 8);
- const int lines_to_copy = MAX(height >> (fraction + 4), 1) << 4;
-
- assert(src_ybc->y_stride == dst_ybc->y_stride);
- vpx_memcpy(dst_ybc->y_buffer + offset, src_ybc->y_buffer + offset,
- stride * (lines_to_copy + 16));
-}
-
-// Enforce a minimum filter level based upon baseline Q
static int get_min_filter_level(VP9_COMP *cpi, int base_qindex) {
- int min_filter_level;
- min_filter_level = 0;
-
- return min_filter_level;
+ return 0;
}
-// Enforce a maximum filter level based upon baseline Q
static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) {
- int max_filter_level = MAX_LOOP_FILTER;
- (void)base_qindex;
-
- if (cpi->twopass.section_intra_rating > 8)
- max_filter_level = MAX_LOOP_FILTER * 3 / 4;
-
- return max_filter_level;
+ return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
+ : MAX_LOOP_FILTER;
}
-
// Stub function for now Alt LF not used
void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) {
}
void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
VP9_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
-
- int best_err = 0;
- int filt_err = 0;
const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
-
- int filter_step;
- int filt_high = 0;
- // Start search at previous frame filter level
- int filt_mid = lf->filter_level;
- int filt_low = 0;
+ int best_err = 0;
+ int filt_err = 0;
int filt_best;
int filt_direction = 0;
-
- int Bias = 0; // Bias against raising loop filter in favor of lowering it.
-
- // Make a copy of the unfiltered / processed recon buffer
- vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+ // Start the search at the previous frame filter level unless it is now out of
+ // range.
+ int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
+ int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
: cpi->oxcf.sharpness;
- // Start the search at the previous frame filter level unless it is now out of
- // range.
- filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
-
- // Define the initial step size
- filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
+ // Make a copy of the unfiltered / processed recon buffer
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
// Get baseline error score
vp9_set_alt_lf_level(cpi, filt_mid);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1, partial);
+ vp9_loop_filter_frame(cm, xd, filt_mid, 1, partial);
best_err = vp9_calc_ss_err(sd, cm->frame_to_show);
filt_best = filt_mid;
@@ -99,35 +65,32 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
while (filter_step > 0) {
- Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
+ const int filt_high = MIN(filt_mid + filter_step, max_filter_level);
+ const int filt_low = MAX(filt_mid - filter_step, min_filter_level);
+
+ // Bias against raising loop filter in favor of lowering it.
+ int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
if (cpi->twopass.section_intra_rating < 20)
- Bias = Bias * cpi->twopass.section_intra_rating / 20;
+ bias = bias * cpi->twopass.section_intra_rating / 20;
// yx, bias less for large block size
- if (cpi->common.tx_mode != ONLY_4X4)
- Bias >>= 1;
-
- filt_high = ((filt_mid + filter_step) > max_filter_level)
- ? max_filter_level
- : (filt_mid + filter_step);
- filt_low = ((filt_mid - filter_step) < min_filter_level)
- ? min_filter_level
- : (filt_mid - filter_step);
+ if (cm->tx_mode != ONLY_4X4)
+ bias >>= 1;
- if ((filt_direction <= 0) && (filt_low != filt_mid)) {
+ if (filt_direction <= 0 && filt_low != filt_mid) {
// Get Low filter error score
vp9_set_alt_lf_level(cpi, filt_low);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1, partial);
+ vp9_loop_filter_frame(cm, xd, filt_low, 1, partial);
filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
- // Re-instate the unfiltered frame
+ // Re-instate the unfiltered frame
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
// If value is close to the best so far then bias towards a lower loop
// filter value.
- if ((filt_err - Bias) < best_err) {
+ if ((filt_err - bias) < best_err) {
// Was it actually better than the previous best?
if (filt_err < best_err)
best_err = filt_err;
@@ -137,9 +100,9 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
}
// Now look at filt_high
- if ((filt_direction >= 0) && (filt_high != filt_mid)) {
+ if (filt_direction >= 0 && filt_high != filt_mid) {
vp9_set_alt_lf_level(cpi, filt_high);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, partial);
+ vp9_loop_filter_frame(cm, xd, filt_high, 1, partial);
filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
@@ -147,7 +110,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
// Was it better than the previous best?
- if (filt_err < (best_err - Bias)) {
+ if (filt_err < (best_err - bias)) {
best_err = filt_err;
filt_best = filt_high;
}
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 17d1f5984..f317f2a0d 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -174,7 +174,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame),
+ vp9_setup_buffer_inter(cpi, x, tile,
ref_frame, block_size, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
}
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 72ab00f98..3ebf98c0f 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -211,19 +211,16 @@ static int estimate_bits_at_q(int frame_kind, int q, int mbs,
static void calc_iframe_target_size(VP9_COMP *cpi) {
- // boost defaults to half second
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
int target;
- // Clear down mmx registers to allow floating point in what follows
vp9_clear_system_state(); // __asm emms;
- // New Two pass RC
- target = cpi->rc.per_frame_bandwidth;
-
// For 1-pass.
if (cpi->pass == 0) {
if (cpi->common.current_video_frame == 0) {
- target = cpi->oxcf.starting_buffer_level / 2;
+ target = oxcf->starting_buffer_level / 2;
} else {
// TODO(marpan): Add in adjustment based on Q.
// If this keyframe was forced, use a more recent Q estimate.
@@ -235,47 +232,49 @@ static void calc_iframe_target_size(VP9_COMP *cpi) {
// Adjustment up based on q: need to fix.
// kf_boost = kf_boost * kfboost_qadjust(Q) / 100;
// Frame separation adjustment (down).
- if (cpi->rc.frames_since_key < cpi->output_framerate / 2) {
- kf_boost = (int)(kf_boost * cpi->rc.frames_since_key /
- (cpi->output_framerate / 2));
+ if (rc->frames_since_key < cpi->output_framerate / 2) {
+ kf_boost = (int)(kf_boost * rc->frames_since_key /
+ (cpi->output_framerate / 2));
}
kf_boost = (kf_boost < 16) ? 16 : kf_boost;
- target = ((16 + kf_boost) * cpi->rc.per_frame_bandwidth) >> 4;
+ target = ((16 + kf_boost) * rc->per_frame_bandwidth) >> 4;
}
- cpi->rc.active_worst_quality = cpi->rc.worst_quality;
+ rc->active_worst_quality = rc->worst_quality;
+ } else {
+ target = rc->per_frame_bandwidth;
}
- if (cpi->oxcf.rc_max_intra_bitrate_pct) {
- int max_rate = cpi->rc.per_frame_bandwidth
- * cpi->oxcf.rc_max_intra_bitrate_pct / 100;
-
- if (target > max_rate)
- target = max_rate;
+ if (oxcf->rc_max_intra_bitrate_pct) {
+ const int max_rate = rc->per_frame_bandwidth *
+ oxcf->rc_max_intra_bitrate_pct / 100;
+ target = MIN(target, max_rate);
}
- cpi->rc.this_frame_target = target;
+ rc->this_frame_target = target;
}
// Update the buffer level: leaky bucket model.
void vp9_update_buffer_level(VP9_COMP *const cpi, int encoded_frame_size) {
- VP9_COMMON *const cm = &cpi->common;
+ const VP9_COMMON *const cm = &cpi->common;
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+
// Non-viewable frames are a special case and are treated as pure overhead.
if (!cm->show_frame) {
rc->bits_off_target -= encoded_frame_size;
} else {
rc->bits_off_target += rc->av_per_frame_bandwidth - encoded_frame_size;
}
+
// Clip the buffer level to the maximum specified buffer size.
- if (rc->bits_off_target > cpi->oxcf.maximum_buffer_size) {
- rc->bits_off_target = cpi->oxcf.maximum_buffer_size;
- }
- rc->buffer_level = rc->bits_off_target;
+ rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
}
int vp9_drop_frame(VP9_COMP *const cpi) {
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
- if (!cpi->oxcf.drop_frames_water_mark) {
+
+ if (!oxcf->drop_frames_water_mark) {
return 0;
} else {
if (rc->buffer_level < 0) {
@@ -284,8 +283,8 @@ int vp9_drop_frame(VP9_COMP *const cpi) {
} else {
// If buffer is below drop_mark, for now just drop every other frame
// (starting with the next frame) until it increases back over drop_mark.
- int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark *
- cpi->oxcf.optimal_buffer_level / 100);
+ int drop_mark = (int)(oxcf->drop_frames_water_mark *
+ oxcf->optimal_buffer_level / 100);
if ((rc->buffer_level > drop_mark) &&
(rc->decimation_factor > 0)) {
--rc->decimation_factor;
@@ -310,14 +309,14 @@ int vp9_drop_frame(VP9_COMP *const cpi) {
}
// Adjust active_worst_quality level based on buffer level.
-static int adjust_active_worst_quality_from_buffer_level(const VP9_COMP *cpi) {
+static int adjust_active_worst_quality_from_buffer_level(const VP9_CONFIG *oxcf,
+ const RATE_CONTROL *rc) {
// Adjust active_worst_quality: If buffer is above the optimal/target level,
// bring active_worst_quality down depending on fullness over buffer.
// If buffer is below the optimal level, let the active_worst_quality go from
// ambient Q (at buffer = optimal level) to worst_quality level
// (at buffer = critical level).
- const RATE_CONTROL *const rc = &cpi->rc;
- const VP9_CONFIG *const oxcf = &cpi->oxcf;
+
int active_worst_quality = rc->active_worst_quality;
// Maximum limit for down adjustment, ~20%.
int max_adjustment_down = active_worst_quality / 5;
@@ -354,31 +353,23 @@ static int adjust_active_worst_quality_from_buffer_level(const VP9_COMP *cpi) {
}
// Adjust target frame size with respect to the buffering constraints:
-static int target_size_from_buffer_level(const VP9_COMP *cpi) {
- const RATE_CONTROL *const rc = &cpi->rc;
- const VP9_CONFIG *const oxcf = &cpi->oxcf;
- int this_frame_target = cpi->rc.this_frame_target;
- int percent_low = 0;
- int percent_high = 0;
- int one_percent_bits = (int)(1 + oxcf->optimal_buffer_level / 100);
- if (rc->buffer_level < oxcf->optimal_buffer_level) {
- percent_low = (int)((oxcf->optimal_buffer_level - rc->buffer_level) /
- one_percent_bits);
- if (percent_low > oxcf->under_shoot_pct)
- percent_low = oxcf->under_shoot_pct;
+static int target_size_from_buffer_level(const VP9_CONFIG *oxcf,
+ const RATE_CONTROL *rc) {
+ int target = rc->this_frame_target;
+ const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
+ const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+ if (diff > 0) {
// Lower the target bandwidth for this frame.
- this_frame_target -= (this_frame_target * percent_low) / 200;
- } else if (rc->buffer_level > oxcf->optimal_buffer_level) {
- percent_high = (int)((rc->buffer_level - oxcf->optimal_buffer_level) /
- one_percent_bits);
- if (percent_high > oxcf->over_shoot_pct)
- percent_high = oxcf->over_shoot_pct;
-
+ const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
+ target -= (target * pct_low) / 200;
+ } else if (diff < 0) {
// Increase the target bandwidth for this frame.
- this_frame_target += (this_frame_target * percent_high) / 200;
+ const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
+ target += (target * pct_high) / 200;
}
- return this_frame_target;
+
+ return target;
}
static void calc_pframe_target_size(VP9_COMP *const cpi) {
@@ -400,10 +391,10 @@ static void calc_pframe_target_size(VP9_COMP *const cpi) {
// For now, use: cpi->rc.av_per_frame_bandwidth / 16:
min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
FRAME_OVERHEAD_BITS);
- rc->this_frame_target = target_size_from_buffer_level(cpi);
+ rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
// Adjust qp-max based on buffer level.
rc->active_worst_quality =
- adjust_active_worst_quality_from_buffer_level(cpi);
+ adjust_active_worst_quality_from_buffer_level(oxcf, rc);
}
}
@@ -602,7 +593,7 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
(last_boosted_q * 0.75));
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
} else if (!(cpi->pass == 0 && cm->current_video_frame == 0)) {
- // not first frame of one pass
+ // not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
double q_val;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index fa6b362d4..242aa8710 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -233,8 +233,8 @@ static void set_block_thresholds(VP9_COMP *cpi) {
const int q = compute_rd_thresh_factor(qindex);
for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
- // Threshold here seem unecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[]
+ // Threshold here seems unnecessarily harsh but fine given actual
+ // range of values used for cpi->sf.thresh_mult[].
const int t = q * rd_thresh_block_size_factor[bsize];
const int thresh_max = INT_MAX / t;
@@ -419,18 +419,12 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
- int rate;
- int64_t dist;
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
if (i == 0)
x->pred_sse[ref] = sse;
- // sse works better than var, since there is no dc prediction used
- model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
- pd->dequant[1] >> 3, &rate, &dist);
- rate_sum += rate;
- dist_sum += (int)dist;
+ dist_sum += (int)sse;
}
*out_rate_sum = rate_sum;
@@ -2276,41 +2270,38 @@ static void setup_pred_block(const MACROBLOCKD *xd,
void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
- int idx, MV_REFERENCE_FRAME frame_type,
+ MV_REFERENCE_FRAME ref_frame,
BLOCK_SIZE block_size,
int mi_row, int mi_col,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
int_mv frame_near_mv[MAX_REF_FRAMES],
struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
- VP9_COMMON *cm = &cpi->common;
- YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
+ const VP9_COMMON *cm = &cpi->common;
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- const struct scale_factors *const sf = &cm->frame_refs[frame_type - 1].sf;
-
+ MODE_INFO *const mi = xd->mi_8x8[0];
+ int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
+ const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
// TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
// use the UV scaling factors.
- setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, sf, sf);
+ setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
// Gets an initial list of candidate vectors from neighbours and orders them
- vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0],
- xd->last_mi,
- frame_type,
- mbmi->ref_mvs[frame_type], mi_row, mi_col);
+ vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref_frame, candidates,
+ mi_row, mi_col);
// Candidate refinement carried out at encoder and decoder
- vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
- mbmi->ref_mvs[frame_type],
- &frame_nearest_mv[frame_type],
- &frame_near_mv[frame_type]);
+ vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
+ &frame_nearest_mv[ref_frame],
+ &frame_near_mv[ref_frame]);
// Further refinement that is encode side only to test the top few candidates
// in full and choose the best as the centre point for subsequent searches.
// The current implementation doesn't support scaling.
if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
- mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
- frame_type, block_size);
+ mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
+ ref_frame, block_size);
}
YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
@@ -3173,7 +3164,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame),
+ vp9_setup_buffer_inter(cpi, x, tile,
ref_frame, block_size, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
}
@@ -3798,7 +3789,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame),
+ vp9_setup_buffer_inter(cpi, x, tile,
ref_frame, block_size, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
yv12_mb);
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 696cf6b11..9ac1f5404 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -39,7 +39,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
- int idx, MV_REFERENCE_FRAME frame_type,
+ MV_REFERENCE_FRAME ref_frame,
BLOCK_SIZE block_size,
int mi_row, int mi_col,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c
new file mode 100644
index 000000000..f15abc07d
--- /dev/null
+++ b/vp9/encoder/vp9_resize.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vp9/common/vp9_common.h"
+#include "vp9/encoder/vp9_resize.h"
+#include "vpx/vpx_integer.h"
+
+#define FILTER_BITS 7
+
+#define INTERP_TAPS 8
+#define SUBPEL_BITS 5
+#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
+#define INTERP_PRECISION_BITS 32
+
+#define ROUND_POWER_OF_TWO(value, n) \
+ (((value) + (1 << ((n) - 1))) >> (n))
+
+typedef int16_t interp_kernel[INTERP_TAPS];
+
+// Filters for interpolation - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = {
+ {-1, -8, 33, 80, 33, -8, -1, 0},
+ {-1, -8, 30, 80, 35, -8, -1, 1},
+ {-1, -8, 28, 80, 37, -7, -2, 1},
+ {0, -8, 26, 79, 39, -7, -2, 1},
+ {0, -8, 24, 79, 41, -7, -2, 1},
+ {0, -8, 22, 78, 43, -6, -2, 1},
+ {0, -8, 20, 78, 45, -5, -3, 1},
+ {0, -8, 18, 77, 48, -5, -3, 1},
+ {0, -8, 16, 76, 50, -4, -3, 1},
+ {0, -8, 15, 75, 52, -3, -4, 1},
+ {0, -7, 13, 74, 54, -3, -4, 1},
+ {0, -7, 11, 73, 56, -2, -4, 1},
+ {0, -7, 10, 71, 58, -1, -4, 1},
+ {1, -7, 8, 70, 60, 0, -5, 1},
+ {1, -6, 6, 68, 62, 1, -5, 1},
+ {1, -6, 5, 67, 63, 2, -5, 1},
+ {1, -6, 4, 65, 65, 4, -6, 1},
+ {1, -5, 2, 63, 67, 5, -6, 1},
+ {1, -5, 1, 62, 68, 6, -6, 1},
+ {1, -5, 0, 60, 70, 8, -7, 1},
+ {1, -4, -1, 58, 71, 10, -7, 0},
+ {1, -4, -2, 56, 73, 11, -7, 0},
+ {1, -4, -3, 54, 74, 13, -7, 0},
+ {1, -4, -3, 52, 75, 15, -8, 0},
+ {1, -3, -4, 50, 76, 16, -8, 0},
+ {1, -3, -5, 48, 77, 18, -8, 0},
+ {1, -3, -5, 45, 78, 20, -8, 0},
+ {1, -2, -6, 43, 78, 22, -8, 0},
+ {1, -2, -7, 41, 79, 24, -8, 0},
+ {1, -2, -7, 39, 79, 26, -8, 0},
+ {1, -2, -7, 37, 80, 28, -8, -1},
+ {1, -1, -8, 35, 80, 30, -8, -1},
+};
+
+// Filters for factor of 2 downsampling.
+static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1};
+static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
+
+static void interpolate(const uint8_t *const input, int inlength,
+ uint8_t *output, int outlength) {
+ const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) /
+ outlength;
+ const int64_t offset = inlength > outlength ?
+ (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
+ -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
+ uint8_t *optr = output;
+ int x, x1, x2, sum, k, int_pel, sub_pel;
+ int64_t y;
+
+ x = 0;
+ y = offset;
+ while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
+ x++;
+ y += delta;
+ }
+ x1 = x;
+ x = outlength - 1;
+ y = delta * x + offset;
+ while ((y >> INTERP_PRECISION_BITS) +
+ (int64_t)(INTERP_TAPS / 2) >= inlength) {
+ x--;
+ y -= delta;
+ }
+ x2 = x;
+ if (x1 > x2) {
+ for (x = 0, y = offset; x < outlength; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = vp9_filteredinterp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k) {
+ const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
+ sum += filter[k] * input[(pk < 0 ? 0 :
+ (pk >= inlength ? inlength - 1 : pk))];
+ }
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ } else {
+ // Initial part.
+ for (x = 0, y = offset; x < x1; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = vp9_filteredinterp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
+ 0 :
+ int_pel - INTERP_TAPS / 2 + 1 + k)];
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ // Middle part.
+ for (; x <= x2; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = vp9_filteredinterp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ // End part.
+ for (; x < outlength; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = vp9_filteredinterp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
+ inlength ? inlength - 1 :
+ int_pel - INTERP_TAPS / 2 + 1 + k)];
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ }
+}
+
+static void down2_symeven(const uint8_t *const input, int length,
+ uint8_t *output) {
+ // Actual filter len = 2 * filter_len_half.
+ static const int16_t *filter = vp9_down2_symeven_half_filter;
+ const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2;
+ int i, j;
+ uint8_t *optr = output;
+ int l1 = filter_len_half;
+ int l2 = (length - filter_len_half);
+ l1 += (l1 & 1);
+ l2 += (l2 & 1);
+ if (l1 > l2) {
+ // Short input length.
+ for (i = 0; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] +
+ input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ } else {
+ // Initial part.
+ for (i = 0; i < l1; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // Middle part.
+ for (; i < l2; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[i + 1 + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // End part.
+ for (; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[i - j] +
+ input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ }
+}
+
+static void down2_symodd(const uint8_t *const input, int length,
+ uint8_t *output) {
+ // Actual filter len = 2 * filter_len_half - 1.
+ static const int16_t *filter = vp9_down2_symodd_half_filter;
+ const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2;
+ int i, j;
+ uint8_t *optr = output;
+ int l1 = filter_len_half - 1;
+ int l2 = (length - filter_len_half + 1);
+ l1 += (l1 & 1);
+ l2 += (l2 & 1);
+ if (l1 > l2) {
+ // Short input length.
+ for (i = 0; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] +
+ input[(i + j >= length ? length - 1 : i + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ } else {
+ // Initial part.
+ for (i = 0; i < l1; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // Middle part.
+ for (; i < l2; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[i + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // End part.
+ for (; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ }
+}
+
+static int get_down2_length(int length, int steps) {
+ int s;
+ for (s = 0; s < steps; ++s)
+ length = (length + 1) >> 1;
+ return length;
+}
+
+int get_down2_steps(int in_length, int out_length) {
+ int steps = 0;
+ int proj_in_length;
+ while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
+ ++steps;
+ in_length = proj_in_length;
+ }
+ return steps;
+}
+
+static void resize_multistep(const uint8_t *const input,
+ int length,
+ uint8_t *output,
+ int olength,
+ uint8_t *buf) {
+ int steps;
+ if (length == olength) {
+ memcpy(output, input, sizeof(uint8_t) * length);
+ return;
+ }
+ steps = get_down2_steps(length, olength);
+
+ if (steps > 0) {
+ int s;
+ uint8_t *out = NULL;
+ uint8_t *tmpbuf = NULL;
+ uint8_t *otmp, *otmp2;
+ int filteredlength = length;
+ if (!tmpbuf) {
+ tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
+ otmp = tmpbuf;
+ } else {
+ otmp = buf;
+ }
+ otmp2 = otmp + get_down2_length(length, 1);
+ for (s = 0; s < steps; ++s) {
+ const int proj_filteredlength = get_down2_length(filteredlength, 1);
+ const uint8_t *const in = (s == 0 ? input : out);
+ if (s == steps - 1 && proj_filteredlength == olength)
+ out = output;
+ else
+ out = (s & 1 ? otmp2 : otmp);
+ if (filteredlength & 1)
+ down2_symodd(in, filteredlength, out);
+ else
+ down2_symeven(in, filteredlength, out);
+ filteredlength = proj_filteredlength;
+ }
+ if (filteredlength != olength) {
+ interpolate(out, filteredlength, output, olength);
+ }
+ if (tmpbuf)
+ free(tmpbuf);
+ } else {
+ interpolate(input, length, output, olength);
+ }
+}
+
+static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
+ int i;
+ uint8_t *iptr = img;
+ uint8_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *aptr++ = *iptr;
+ }
+}
+
+static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
+ int i;
+ uint8_t *iptr = img;
+ uint8_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *iptr = *aptr++;
+ }
+}
+
+void vp9_resize_plane(const uint8_t *const input,
+ int height,
+ int width,
+ int in_stride,
+ uint8_t *output,
+ int height2,
+ int width2,
+ int out_stride) {
+ int i;
+ uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
+ uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
+ (width < height ? height : width));
+ uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
+ for (i = 0; i < height; ++i)
+ resize_multistep(input + in_stride * i, width,
+ intbuf + width2 * i, width2, tmpbuf);
+ for (i = 0; i < width2; ++i) {
+ fill_col_to_arr(intbuf + i, width2, height, arrbuf);
+ resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
+ fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
+ }
+ free(intbuf);
+ free(tmpbuf);
+ free(arrbuf);
+}
+
+void vp9_resize_frame420(const uint8_t *const y,
+ int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride,
+ int height, int width,
+ uint8_t *oy, int oy_stride,
+ uint8_t *ou, uint8_t *ov, int ouv_stride,
+ int oheight, int owidth) {
+ vp9_resize_plane(y, height, width, y_stride,
+ oy, oheight, owidth, oy_stride);
+ vp9_resize_plane(u, height / 2, width / 2, uv_stride,
+ ou, oheight / 2, owidth / 2, ouv_stride);
+ vp9_resize_plane(v, height / 2, width / 2, uv_stride,
+ ov, oheight / 2, owidth / 2, ouv_stride);
+}
+
+void vp9_resize_frame422(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride,
+ int height, int width,
+ uint8_t *oy, int oy_stride,
+ uint8_t *ou, uint8_t *ov, int ouv_stride,
+ int oheight, int owidth) {
+ vp9_resize_plane(y, height, width, y_stride,
+ oy, oheight, owidth, oy_stride);
+ vp9_resize_plane(u, height, width / 2, uv_stride,
+ ou, oheight, owidth / 2, ouv_stride);
+ vp9_resize_plane(v, height, width / 2, uv_stride,
+ ov, oheight, owidth / 2, ouv_stride);
+}
+
+void vp9_resize_frame444(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride,
+ int height, int width,
+ uint8_t *oy, int oy_stride,
+ uint8_t *ou, uint8_t *ov, int ouv_stride,
+ int oheight, int owidth) {
+ vp9_resize_plane(y, height, width, y_stride,
+ oy, oheight, owidth, oy_stride);
+ vp9_resize_plane(u, height, width, uv_stride,
+ ou, oheight, owidth, ouv_stride);
+ vp9_resize_plane(v, height, width, uv_stride,
+ ov, oheight, owidth, ouv_stride);
+}
diff --git a/vp9/encoder/vp9_resize.h b/vp9/encoder/vp9_resize.h
new file mode 100644
index 000000000..c67595a3f
--- /dev/null
+++ b/vp9/encoder/vp9_resize.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_RESIZE_H_
+#define VP9_ENCODER_VP9_RESIZE_H_
+
+#include <stdio.h>
+
+void vp9_resize_plane(const uint8_t *const input,
+ int height,
+ int width,
+ int in_stride,
+ uint8_t *output,
+ int height2,
+ int width2,
+ int out_stride);
+void vp9_resize_frame420(const uint8_t *const y,
+ int y_stride,
+ const uint8_t *const u,
+ const uint8_t *const v,
+ int uv_stride,
+ int height,
+ int width,
+ uint8_t *oy,
+ int oy_stride,
+ uint8_t *ou,
+ uint8_t *ov,
+ int ouv_stride,
+ int oheight,
+ int owidth);
+void vp9_resize_frame422(const uint8_t *const y,
+ int y_stride,
+ const uint8_t *const u,
+ const uint8_t *const v,
+ int uv_stride,
+ int height,
+ int width,
+ uint8_t *oy,
+ int oy_stride,
+ uint8_t *ou,
+ uint8_t *ov,
+ int ouv_stride,
+ int oheight,
+ int owidth);
+void vp9_resize_frame444(const uint8_t *const y,
+ int y_stride,
+ const uint8_t *const u,
+ const uint8_t *const v,
+ int uv_stride,
+ int height,
+ int width,
+ uint8_t *oy,
+ int oy_stride,
+ uint8_t *ou,
+ uint8_t *ov,
+ int ouv_stride,
+ int oheight,
+ int owidth);
+
+#endif // VP9_ENCODER_VP9_RESIZE_H_
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index 55d595baf..58c5df47e 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -8,31 +8,44 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#include <stdlib.h>
+
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
-#include "vp9/encoder/vp9_sadmxn.h"
-#include "vp9/encoder/vp9_variance.h"
+
#include "vpx/vpx_integer.h"
+#include "vp9/encoder/vp9_variance.h"
+
+static INLINE unsigned int sad(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int width, int height) {
+ int y, x;
+ unsigned int sad = 0;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ sad += abs(a[x] - b[x]);
+
+ a += a_stride;
+ b += b_stride;
+ }
+
+ return sad;
+}
#define sad_mxn_func(m, n) \
-unsigned int vp9_sad##m##x##n##_c(const uint8_t *src_ptr, \
- int src_stride, \
- const uint8_t *ref_ptr, \
- int ref_stride, \
+unsigned int vp9_sad##m##x##n##_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
unsigned int max_sad) { \
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, m, n); \
+ return sad(src_ptr, src_stride, ref_ptr, ref_stride, m, n); \
} \
-unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, \
- int src_stride, \
- const uint8_t *ref_ptr, \
- int ref_stride, \
+unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
const uint8_t *second_pred, \
unsigned int max_sad) { \
uint8_t comp_pred[m * n]; \
comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
- return sad_mx_n_c(src_ptr, src_stride, comp_pred, m, m, n); \
+ return sad(src_ptr, src_stride, comp_pred, m, m, n); \
}
sad_mxn_func(64, 64)
@@ -49,567 +62,263 @@ sad_mxn_func(8, 4)
sad_mxn_func(4, 8)
sad_mxn_func(4, 4)
-void vp9_sad64x32x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x32(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad64x32(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad64x32(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad64x32(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad64x32(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad32x64x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x64(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x64(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x64(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad32x64(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad32x64(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad32x16x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x16(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x16(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x16(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad32x16(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad32x16(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x32x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x32(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x32(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x32(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x32(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad16x32(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad64x64x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad64x64x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64(src_ptr, src_stride, ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + 2, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad32x32x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad32x32x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad32x32(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad64x64x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad32x32x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad32x32(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x16x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad16x16x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad16x16(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x16x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sad_array) {
- sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad16x16(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x8x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad16x8x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad16x8(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x8x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sad_array) {
- sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad16x8(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x8x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad8x8x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad8x8(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x8x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sad_array) {
- sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad8x8(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x16x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad8x16x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad8x16(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x16x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sad_array) {
- sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad8x16(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad4x4x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad4x4x3_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 3; ++i)
+ sad_array[i] = vp9_sad4x4(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad4x4x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
+void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sad_array) {
- sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad4x4(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad64x64x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad64x64(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad32x32x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad32x32(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x16x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad16x16(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad16x8x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad16x8(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x8x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad8x8(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x16x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad8x16(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x4x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad8x4(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad8x4x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- uint32_t *sad_array) {
- sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad8x4(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ uint32_t *sad_array) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad8x4(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad4x8(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
-void vp9_sad4x8x8_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- uint32_t *sad_array) {
- sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = vp9_sad4x8(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ uint32_t *sad_array) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ sad_array[i] = vp9_sad4x8(src_ptr, src_stride, ref_ptr + i, ref_stride,
+ 0x7fffffff);
}
-void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t* const ref_ptr[],
- int ref_stride,
+void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t* const ref_ptr[], int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ int i;
+ for (i = 0; i < 4; ++i)
+ sad_array[i] = vp9_sad4x4(src_ptr, src_stride, ref_ptr[i], ref_stride,
+ 0x7fffffff);
}
diff --git a/vp9/encoder/vp9_sadmxn.h b/vp9/encoder/vp9_sadmxn.h
deleted file mode 100644
index 1bae4dd67..000000000
--- a/vp9/encoder/vp9_sadmxn.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_ENCODER_VP9_SADMXN_H_
-#define VP9_ENCODER_VP9_SADMXN_H_
-
-#include "./vpx_config.h"
-#include "vpx/vpx_integer.h"
-
-static INLINE unsigned int sad_mx_n_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- int m,
- int n) {
- int r, c;
- unsigned int sad = 0;
-
- for (r = 0; r < n; r++) {
- for (c = 0; c < m; c++) {
- sad += abs(src_ptr[c] - ref_ptr[c]);
- }
-
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- return sad;
-}
-
-#endif // VP9_ENCODER_VP9_SADMXN_H_
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 876219268..c2eea0aaa 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -227,7 +227,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
#if ALT_REF_MC_ENABLED
// Source frames are extended to 16 pixels. This is different than
- // L/A/G reference frames that have a border of 32 (VP9BORDERINPIXELS)
+ // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
// A 6/8 tap filter is used for motion search. This requires 2 pixels
// before and 3 pixels after. So the largest Y mv on a border would
// then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 478b45ac0..b1c029cba 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -74,9 +74,6 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
-ifeq ($(ARCH_X86_64),yes)
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c
-endif
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 5c14b2e40..897ecd702 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -513,10 +513,8 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
priv->vp8_cfg = extracfg_map[i].cfg;
priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
- // TODO(agrange) Check the limits set on this buffer, or the check that is
- // applied in vp9e_encode.
+ // Maximum buffer size approximated based on having multiple ARF.
priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 8;
-// priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
@@ -692,7 +690,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx,
}
}
- /* Initialize the encoder instance on the first frame*/
+ /* Initialize the encoder instance on the first frame. */
if (!res && ctx->cpi) {
unsigned int lib_flags;
YV12_BUFFER_CONFIG sd;
@@ -704,9 +702,6 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx,
if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
- // if (ctx->base.init_flags & VPX_CODEC_USE_OUTPUT_PARTITION)
- // ((VP9_COMP *)ctx->cpi)->output_partition = 1;
-
/* Convert API flags to internal codec lib flags */
lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index ed0122c1b..58256b22b 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -29,7 +29,7 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
img->fmt = VPX_IMG_FMT_I420;
}
img->w = yv12->y_stride;
- img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9BORDERINPIXELS, 3);
+ img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
img->d_w = yv12->y_crop_width;
img->d_h = yv12->y_crop_height;
img->x_chroma_shift = yv12->uv_width < yv12->y_width;
@@ -75,7 +75,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
#if CONFIG_ALPHA
- // For development purposes, force alpha to hold the same data a Y for now.
+ // For development purposes, force alpha to hold the same data as Y for now.
yv12->alpha_buffer = yv12->y_buffer;
yv12->alpha_width = yv12->y_width;
yv12->alpha_height = yv12->y_height;
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 9ea0f549f..63003b9c2 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -44,7 +44,6 @@ VP9_CX_SRCS-yes += encoder/vp9_quantize.h
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
VP9_CX_SRCS-yes += encoder/vp9_pickmode.h
-VP9_CX_SRCS-yes += encoder/vp9_sadmxn.h
VP9_CX_SRCS-yes += encoder/vp9_tokenize.h
VP9_CX_SRCS-yes += encoder/vp9_treewriter.h
VP9_CX_SRCS-yes += encoder/vp9_variance.h
@@ -62,6 +61,8 @@ VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
VP9_CX_SRCS-yes += encoder/vp9_subexp.c
VP9_CX_SRCS-yes += encoder/vp9_subexp.h
+VP9_CX_SRCS-yes += encoder/vp9_resize.c
+VP9_CX_SRCS-yes += encoder/vp9_resize.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index bf5fc0779..610e7d280 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -18,10 +18,11 @@ extern "C" {
#include "vpx/vpx_external_frame_buffer.h"
#include "vpx/vpx_integer.h"
-#define VP8BORDERINPIXELS 32
-#define VP9INNERBORDERINPIXELS 96
-#define VP9BORDERINPIXELS 160
-#define VP9_INTERP_EXTEND 4
+#define VP8BORDERINPIXELS 32
+#define VP9INNERBORDERINPIXELS 96
+#define VP9_INTERP_EXTEND 4
+#define VP9_ENC_BORDER_IN_PIXELS 160
+#define VP9_DEC_BORDER_IN_PIXELS 32
typedef struct yv12_buffer_config {
int y_width;
diff --git a/vpxdec.c b/vpxdec.c
index 420497914..fc344a162 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -33,6 +33,7 @@
#include "./tools_common.h"
#include "./webmdec.h"
+#include "./y4menc.h"
static const char *exec_name;
@@ -131,6 +132,21 @@ static const arg_def_t *vp8_pp_args[] = {
};
#endif
+static int vpx_image_scale(vpx_image_t *src, vpx_image_t *dst,
+ FilterMode mode) {
+ assert(src->fmt == VPX_IMG_FMT_I420);
+ assert(dst->fmt == VPX_IMG_FMT_I420);
+ return I420Scale(src->planes[VPX_PLANE_Y], src->stride[VPX_PLANE_Y],
+ src->planes[VPX_PLANE_U], src->stride[VPX_PLANE_U],
+ src->planes[VPX_PLANE_V], src->stride[VPX_PLANE_V],
+ src->d_w, src->d_h,
+ dst->planes[VPX_PLANE_Y], dst->stride[VPX_PLANE_Y],
+ dst->planes[VPX_PLANE_U], dst->stride[VPX_PLANE_U],
+ dst->planes[VPX_PLANE_V], dst->stride[VPX_PLANE_V],
+ dst->d_w, dst->d_h,
+ mode);
+}
+
void usage_exit() {
int i;
@@ -229,47 +245,51 @@ static int read_frame(struct VpxDecInputContext *input, uint8_t **buf,
}
}
-void *out_open(const char *out_fn, int do_md5) {
- void *out = NULL;
+static int get_image_plane_width(int plane, const vpx_image_t *img) {
+ return (plane > 0 && img->x_chroma_shift > 0) ?
+ (img->d_w + 1) >> img->x_chroma_shift :
+ img->d_w;
+}
- if (do_md5) {
- MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
- (void)out_fn;
- MD5Init(md5_ctx);
- } else {
- FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
- : set_binary_mode(stdout);
+static int get_image_plane_height(int plane, const vpx_image_t *img) {
+ return (plane > 0 && img->y_chroma_shift > 0) ?
+ (img->d_h + 1) >> img->y_chroma_shift :
+ img->d_h;
+}
- if (!outfile) {
- fatal("Failed to output file");
- }
- }
+static void update_image_md5(const vpx_image_t *img, const int planes[3],
+ MD5Context *md5) {
+ int i, y;
- return out;
-}
+ for (i = 0; i < 3; ++i) {
+ const int plane = planes[i];
+ const unsigned char *buf = img->planes[plane];
+ const int stride = img->stride[plane];
+ const int w = get_image_plane_width(plane, img);
+ const int h = get_image_plane_height(plane, img);
-void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5) {
- if (do_md5) {
- MD5Update(out, buf, len);
- } else {
- (void) fwrite(buf, 1, len, out);
+ for (y = 0; y < h; ++y) {
+ MD5Update(md5, buf, w);
+ buf += stride;
+ }
}
}
-void out_close(void *out, const char *out_fn, int do_md5) {
- if (do_md5) {
- uint8_t md5[16];
- int i;
+static void write_image_file(const vpx_image_t *img, const int planes[3],
+ FILE *file) {
+ int i, y;
- MD5Final(md5, out);
- free(out);
+ for (i = 0; i < 3; ++i) {
+ const int plane = planes[i];
+ const unsigned char *buf = img->planes[plane];
+ const int stride = img->stride[plane];
+ const int w = get_image_plane_width(plane, img);
+ const int h = get_image_plane_height(plane, img);
- for (i = 0; i < 16; i++)
- printf("%02x", md5[i]);
-
- printf(" %s\n", out_fn);
- } else {
- fclose(out);
+ for (y = 0; y < h; ++y) {
+ fwrite(buf, 1, w, file);
+ buf += stride;
+ }
}
}
@@ -413,6 +433,39 @@ void generate_filename(const char *pattern, char *out, size_t q_len,
} while (*p);
}
+static int is_single_file(const char *outfile_pattern) {
+ const char *p = outfile_pattern;
+
+ do {
+ p = strchr(p, '%');
+ if (p && p[1] >= '1' && p[1] <= '9')
+ return 0; // pattern contains sequence number, so it's not unique
+ if (p)
+ p++;
+ } while (p);
+
+ return 1;
+}
+
+static void print_md5(unsigned char digest[16], const char *filename) {
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ printf("%02x", digest[i]);
+ printf(" %s\n", filename);
+}
+
+static FILE *open_outfile(const char *name) {
+ if (strcmp("-", name) == 0) {
+ set_binary_mode(stdout);
+ return stdout;
+ } else {
+ FILE *file = fopen(name, "wb");
+ if (!file)
+ fatal("Failed to output file %s", name);
+ return file;
+ }
+}
int main_loop(int argc, const char **argv_) {
vpx_codec_ctx_t decoder;
@@ -430,11 +483,9 @@ int main_loop(int argc, const char **argv_) {
unsigned long dx_time = 0;
struct arg arg;
char **argv, **argi, **argj;
- const char *outfile_pattern = 0;
- char outfile[PATH_MAX];
+
int single_file;
int use_y4m = 1;
- void *out = NULL;
vpx_codec_dec_cfg_t cfg = {0};
#if CONFIG_VP8_DECODER
vp8_postproc_cfg_t vp8_pp_cfg = {0};
@@ -451,8 +502,13 @@ int main_loop(int argc, const char **argv_) {
int num_external_frame_buffers = 0;
int fb_lru_cache = 0;
vpx_codec_frame_buffer_t *frame_buffers = NULL;
- int display_width = 0;
- int display_height = 0;
+
+ const char *outfile_pattern = NULL;
+ char outfile_name[PATH_MAX] = {0};
+ FILE *outfile = NULL;
+
+ MD5Context md5_ctx;
+ unsigned char md5_digest[16];
struct VpxDecInputContext input = {0};
struct VpxInputContext vpx_input_ctx = {0};
@@ -588,8 +644,7 @@ int main_loop(int argc, const char **argv_) {
infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
if (!infile) {
- fprintf(stderr, "Failed to open file '%s'",
- strcmp(fn, "-") ? fn : "stdin");
+ fprintf(stderr, "Failed to open file '%s'", strcmp(fn, "-") ? fn : "stdin");
return EXIT_FAILURE;
}
#if CONFIG_OS_SUPPORT
@@ -613,58 +668,32 @@ int main_loop(int argc, const char **argv_) {
return EXIT_FAILURE;
}
- /* If the output file is not set or doesn't have a sequence number in
- * it, then we only open it once.
- */
outfile_pattern = outfile_pattern ? outfile_pattern : "-";
- single_file = 1;
- {
- const char *p = outfile_pattern;
- do {
- p = strchr(p, '%');
- if (p && p[1] >= '1' && p[1] <= '9') {
- /* pattern contains sequence number, so it's not unique. */
- single_file = 0;
- break;
- }
- if (p)
- p++;
- } while (p);
- }
+ single_file = is_single_file(outfile_pattern);
- if (single_file && !noblit) {
- generate_filename(outfile_pattern, outfile, sizeof(outfile) - 1,
+ if (!noblit && single_file) {
+ generate_filename(outfile_pattern, outfile_name, PATH_MAX,
vpx_input_ctx.width, vpx_input_ctx.height, 0);
- out = out_open(outfile, do_md5);
+ if (do_md5)
+ MD5Init(&md5_ctx);
+ else
+ outfile = open_outfile(outfile_name);
}
if (use_y4m && !noblit) {
- char buffer[128];
-
if (!single_file) {
fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
" try --i420 or --yv12.\n");
return EXIT_FAILURE;
}
- if (vpx_input_ctx.file_type == FILE_TYPE_WEBM)
+ if (vpx_input_ctx.file_type == FILE_TYPE_WEBM) {
if (webm_guess_framerate(input.webm_ctx, input.vpx_input_ctx)) {
fprintf(stderr, "Failed to guess framerate -- error parsing "
"webm file?\n");
return EXIT_FAILURE;
}
-
-
- /*Note: We can't output an aspect ratio here because IVF doesn't
- store one, and neither does VP8.
- That will have to wait until these tools support WebM natively.*/
- snprintf(buffer, sizeof(buffer), "YUV4MPEG2 W%u H%u F%u:%u I%c ",
- vpx_input_ctx.width, vpx_input_ctx.height,
- vpx_input_ctx.framerate.numerator,
- vpx_input_ctx.framerate.denominator,
- 'p');
- out_put(out, (unsigned char *)buffer,
- (unsigned int)strlen(buffer), do_md5);
+ }
}
/* Try to determine the codec from the fourcc. */
@@ -811,25 +840,20 @@ int main_loop(int argc, const char **argv_) {
show_progress(frame_in, frame_out, dx_time);
if (!noblit) {
- if (frame_out == 1 && img && use_y4m) {
- /* Write out the color format to terminate the header line */
- const char *color =
- img->fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
- img->fmt == VPX_IMG_FMT_I444 ? "C444\n" :
- img->fmt == VPX_IMG_FMT_I422 ? "C422\n" :
- "C420jpeg\n";
-
- out_put(out, (const unsigned char*)color, strlen(color), do_md5);
- }
+ if (frame_out == 1 && img && single_file && !do_md5 && use_y4m)
+ y4m_write_file_header(outfile,
+ vpx_input_ctx.width, vpx_input_ctx.height,
+ &vpx_input_ctx.framerate, img->fmt);
if (img && do_scale) {
if (frame_out == 1) {
// If the output frames are to be scaled to a fixed display size then
// use the width and height specified in the container. If either of
// these is set to 0, use the display size set in the first frame
- // header.
- display_width = vpx_input_ctx.width;
- display_height = vpx_input_ctx.height;
+ // header. If that is unavailable, use the raw decoded size of the
+ // first decoded frame.
+ int display_width = vpx_input_ctx.width;
+ int display_height = vpx_input_ctx.height;
if (!display_width || !display_height) {
int display_size[2];
if (vpx_codec_control(&decoder, VP9D_GET_DISPLAY_SIZE,
@@ -846,67 +870,40 @@ int main_loop(int argc, const char **argv_) {
display_height, 16);
}
- if (img->d_w != display_width || img->d_h != display_height) {
- assert(img->fmt == VPX_IMG_FMT_I420);
- I420Scale(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
- img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
- img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
- img->d_w, img->d_h,
- scaled_img->planes[VPX_PLANE_Y],
- scaled_img->stride[VPX_PLANE_Y],
- scaled_img->planes[VPX_PLANE_U],
- scaled_img->stride[VPX_PLANE_U],
- scaled_img->planes[VPX_PLANE_V],
- scaled_img->stride[VPX_PLANE_V],
- display_width, display_height,
- kFilterBox);
+ if (img->d_w != scaled_img->d_w || img->d_h != scaled_img->d_h) {
+ vpx_image_scale(img, scaled_img, kFilterBox);
img = scaled_img;
}
}
+
if (img) {
- unsigned int y;
- char out_fn[PATH_MAX];
- uint8_t *buf;
- unsigned int c_w =
- img->x_chroma_shift ? (1 + img->d_w) >> img->x_chroma_shift
- : img->d_w;
- unsigned int c_h =
- img->y_chroma_shift ? (1 + img->d_h) >> img->y_chroma_shift
- : img->d_h;
+ const int PLANES_YUV[] = {VPX_PLANE_Y, VPX_PLANE_U, VPX_PLANE_V};
+ const int PLANES_YVU[] = {VPX_PLANE_Y, VPX_PLANE_V, VPX_PLANE_U};
- if (!single_file) {
- size_t len = sizeof(out_fn) - 1;
+ const int *planes = flipuv ? PLANES_YVU : PLANES_YUV;
- out_fn[len] = '\0';
- generate_filename(outfile_pattern, out_fn, len - 1,
+ if (!single_file) {
+ generate_filename(outfile_pattern, outfile_name, PATH_MAX,
img->d_w, img->d_h, frame_in);
- out = out_open(out_fn, do_md5);
- } else if (use_y4m)
- out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
-
- buf = img->planes[VPX_PLANE_Y];
-
- for (y = 0; y < img->d_h; y++) {
- out_put(out, buf, img->d_w, do_md5);
- buf += img->stride[VPX_PLANE_Y];
- }
-
- buf = img->planes[flipuv ? VPX_PLANE_V : VPX_PLANE_U];
-
- for (y = 0; y < c_h; y++) {
- out_put(out, buf, c_w, do_md5);
- buf += img->stride[VPX_PLANE_U];
- }
-
- buf = img->planes[flipuv ? VPX_PLANE_U : VPX_PLANE_V];
-
- for (y = 0; y < c_h; y++) {
- out_put(out, buf, c_w, do_md5);
- buf += img->stride[VPX_PLANE_V];
+ if (do_md5) {
+ MD5Init(&md5_ctx);
+ update_image_md5(img, planes, &md5_ctx);
+ MD5Final(md5_digest, &md5_ctx);
+ print_md5(md5_digest, outfile_name);
+ } else {
+ outfile = open_outfile(outfile_name);
+ write_image_file(img, planes, outfile);
+ fclose(outfile);
+ }
+ } else {
+ if (do_md5) {
+ update_image_md5(img, planes, &md5_ctx);
+ } else {
+ if (use_y4m)
+ y4m_write_frame_header(outfile);
+ write_image_file(img, planes, outfile);
+ }
}
-
- if (!single_file)
- out_close(out, out_fn, do_md5);
}
}
@@ -930,8 +927,14 @@ fail:
return EXIT_FAILURE;
}
- if (single_file && !noblit)
- out_close(out, outfile, do_md5);
+ if (!noblit && single_file) {
+ if (do_md5) {
+ MD5Final(md5_digest, &md5_ctx);
+ print_md5(md5_digest, outfile_name);
+ } else {
+ fclose(outfile);
+ }
+ }
if (input.vpx_input_ctx->file_type == FILE_TYPE_WEBM)
webm_free(input.webm_ctx);
diff --git a/vpxenc.c b/vpxenc.c
index 396e43dc9..f19300acf 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -833,8 +833,8 @@ static int compare_img(vpx_image_t *img1, vpx_image_t *img2)
unsigned int i;
match &= (img1->fmt == img2->fmt);
- match &= (img1->w == img2->w);
- match &= (img1->h == img2->h);
+ match &= (img1->d_w == img2->d_w);
+ match &= (img1->d_h == img2->d_h);
for (i = 0; i < img1->d_h; i++)
match &= (memcmp(img1->planes[VPX_PLANE_Y]+i*img1->stride[VPX_PLANE_Y],
diff --git a/y4menc.c b/y4menc.c
new file mode 100644
index 000000000..8321b432e
--- /dev/null
+++ b/y4menc.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./y4menc.h"
+
+void y4m_write_file_header(FILE *file, int width, int height,
+ const struct VpxRational *framerate,
+ vpx_img_fmt_t fmt) {
+ const char *color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
+ fmt == VPX_IMG_FMT_I444 ? "C444\n" :
+ fmt == VPX_IMG_FMT_I422 ? "C422\n" :
+ "C420jpeg\n";
+
+ // Note: We can't output an aspect ratio here because IVF doesn't
+ // store one, and neither does VP8.
+ // That will have to wait until these tools support WebM natively.*/
+ fprintf(file, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
+ framerate->numerator, framerate->denominator, 'p', color);
+}
+
+void y4m_write_frame_header(FILE *file) {
+ fprintf(file, "FRAME\n");
+}
diff --git a/y4menc.h b/y4menc.h
new file mode 100644
index 000000000..e5f7978a7
--- /dev/null
+++ b/y4menc.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef Y4MENC_H_
+#define Y4MENC_H_
+
+#include <stdio.h>
+
+#include "./tools_common.h"
+
+#include "vpx/vpx_decoder.h"
+
+void y4m_write_file_header(FILE *file, int width, int height,
+ const struct VpxRational *framerate,
+ vpx_img_fmt_t fmt);
+
+void y4m_write_frame_header(FILE *file);
+
+
+#endif // Y4MENC_H_