diff options
40 files changed, 2822 insertions, 1198 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh index 83f480a42..b43a4ec18 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -1091,6 +1091,15 @@ EOF # Skip the check by setting AS arbitrarily AS=msvs msvs_arch_dir=x86-msvs + vc_version=${tgt_cc##vs} + case $vc_version in + 7|8|9) + echo "${tgt_cc} does not support avx/avx2, disabling....." + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx --disable-avx2 " + soft_disable avx + soft_disable avx2 + ;; + esac ;; esac diff --git a/examples.mk b/examples.mk index 88327fed6..2cee298c2 100644 --- a/examples.mk +++ b/examples.mk @@ -37,6 +37,7 @@ vpxdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += vpxenc.c vpxenc.SRCS += args.c args.h y4minput.c y4minput.h vpxenc.SRCS += tools_common.c tools_common.h +vpxenc.SRCS += webmenc.c webmenc.h vpxenc.SRCS += vpx_ports/mem_ops.h vpxenc.SRCS += vpx_ports/mem_ops_aligned.h vpxenc.SRCS += vpx_ports/vpx_timer.h @@ -50,6 +51,7 @@ UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c +vp9_spatial_scalable_encoder.SRCS += args.c args.h vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder @@ -122,6 +122,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS)) CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h + INSTALL-LIBS-yes += include/vpx/svc_context.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h CODEC_DOC_SECTIONS += vp9 vp9_encoder diff --git a/test/svc_test.cc b/test/svc_test.cc new file mode 100644 index 000000000..5941caed8 --- /dev/null +++ b/test/svc_test.cc @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <string> +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/i420_video_source.h" +#include "vpx/svc_context.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +namespace { + +using libvpx_test::CodecFactory; +using libvpx_test::Decoder; +using libvpx_test::VP9CodecFactory; + +class SvcTest : public ::testing::Test { + protected: + static const uint32_t kWidth = 352; + static const uint32_t kHeight = 288; + + SvcTest() + : codec_iface_(0), + test_file_name_("hantro_collage_w352h288.yuv"), + decoder_(0) {} + + virtual ~SvcTest() {} + + virtual void SetUp() { + memset(&svc_, 0, sizeof(svc_)); + svc_.first_frame_full_size = 1; + svc_.encoding_mode = INTER_LAYER_PREDICTION_IP; + svc_.log_level = SVC_LOG_DEBUG; + svc_.log_print = 0; + + codec_iface_ = vpx_codec_vp9_cx(); + const vpx_codec_err_t res = + vpx_codec_enc_config_default(codec_iface_, &codec_enc_, 0); + EXPECT_EQ(VPX_CODEC_OK, res); + + codec_enc_.g_w = kWidth; + codec_enc_.g_h = kHeight; + codec_enc_.g_timebase.num = 1; + codec_enc_.g_timebase.den = 60; + codec_enc_.kf_min_dist = 100; + codec_enc_.kf_max_dist = 100; + + vpx_codec_dec_cfg_t dec_cfg = {0}; + VP9CodecFactory codec_factory; + decoder_ = codec_factory.CreateDecoder(dec_cfg, 0); + } + + virtual void TearDown() { + vpx_svc_release(&svc_); + } + + SvcContext svc_; + vpx_codec_ctx_t codec_; + struct vpx_codec_enc_cfg codec_enc_; + vpx_codec_iface_t *codec_iface_; + std::string test_file_name_; + + Decoder *decoder_; +}; + +TEST_F(SvcTest, SvcInit) { + svc_.spatial_layers = 0; // use default layers + vpx_codec_err_t res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(VPX_SS_DEFAULT_LAYERS, svc_.spatial_layers); + + res = vpx_svc_init(NULL, &codec_, codec_iface_, &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_init(&svc_, NULL, codec_iface_, &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_init(&svc_, &codec_, NULL, &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_init(&svc_, &codec_, codec_iface_, NULL); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + svc_.spatial_layers = 6; // too many layers + res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + svc_.spatial_layers = 2; + vpx_svc_set_scale_factors(&svc_, "4/16,16*16"); // invalid scale values + res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); // valid scale values + res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); +} + +TEST_F(SvcTest, SetOptions) { + vpx_codec_err_t res = vpx_svc_set_options(NULL, "layers=3"); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_options(&svc_, NULL); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_options(&svc_, "layers=3"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(3, svc_.spatial_layers); + + vpx_svc_set_options(&svc_, "not-an-option=1"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_options(&svc_, "encoding-mode=alt-ip"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(ALT_INTER_LAYER_PREDICTION_IP, svc_.encoding_mode); + + vpx_svc_set_options(&svc_, "layers=2 encoding-mode=ip"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(2, svc_.spatial_layers); + EXPECT_EQ(INTER_LAYER_PREDICTION_IP, svc_.encoding_mode); + + vpx_svc_set_options(&svc_, "scale-factors=not-scale-factors"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_options(&svc_, "scale-factors=1/3,2/3"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + + vpx_svc_set_options(&svc_, "quantizers=not-quantizers"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_options(&svc_, "quantizers=40,45"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); +} + +TEST_F(SvcTest, SetQuantizers) { + vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30"); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_set_quantizers(&svc_, NULL); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + svc_.first_frame_full_size = 0; + svc_.spatial_layers = 2; + res = vpx_svc_set_quantizers(&svc_, "40,30"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + + res = vpx_svc_set_quantizers(&svc_, "40"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); +} + +TEST_F(SvcTest, SetScaleFactors) { + vpx_codec_err_t res = vpx_svc_set_scale_factors(NULL, "4/16,16/16"); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_set_scale_factors(&svc_, NULL); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + svc_.first_frame_full_size = 0; + svc_.spatial_layers = 2; + res = vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + + res = vpx_svc_set_scale_factors(&svc_, "4/16"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); +} + +// test that decoder can handle an SVC frame as the first frame in a sequence +// this test is disabled since it always fails because of a decoder issue +// https://code.google.com/p/webm/issues/detail?id=654 +TEST_F(SvcTest, DISABLED_FirstFrameHasLayers) { + svc_.first_frame_full_size = 0; + svc_.spatial_layers = 2; + vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); + vpx_svc_set_quantizers(&svc_, "40,30"); + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + video.Begin(); + + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_REALTIME); + EXPECT_EQ(VPX_CODEC_OK, res); + + const vpx_codec_err_t res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + + // this test fails with a decoder error + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); +} + +TEST_F(SvcTest, EncodeThreeFrames) { + svc_.first_frame_full_size = 1; + svc_.spatial_layers = 2; + vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); + vpx_svc_set_quantizers(&svc_, "40,30"); + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + ASSERT_EQ(VPX_CODEC_OK, res); + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + // FRAME 1 + video.Begin(); + // this frame is full size, with only one layer + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_REALTIME); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); + + vpx_codec_err_t res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + // FRAME 2 + video.Next(); + // this is an I-frame + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_REALTIME); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); + + res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + // FRAME 2 + video.Next(); + // this is a P-frame + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_REALTIME); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); + + res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); +} + +TEST_F(SvcTest, GetLayerResolution) { + svc_.first_frame_full_size = 0; + svc_.spatial_layers = 2; + vpx_svc_set_scale_factors(&svc_, "4/16,8/16"); + vpx_svc_set_quantizers(&svc_, "40,30"); + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + + // ensure that requested layer is a valid layer + uint32_t layer_width, layer_height; + res = vpx_svc_get_layer_resolution(&svc_, svc_.spatial_layers, + &layer_width, &layer_height); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_get_layer_resolution(NULL, 0, &layer_width, &layer_height); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_get_layer_resolution(&svc_, 0, NULL, &layer_height); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_get_layer_resolution(&svc_, 0, &layer_width, NULL); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_get_layer_resolution(&svc_, 0, &layer_width, &layer_height); + EXPECT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(kWidth * 4 / 16, layer_width); + EXPECT_EQ(kHeight * 4 / 16, layer_height); + + res = vpx_svc_get_layer_resolution(&svc_, 1, &layer_width, &layer_height); + EXPECT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(kWidth * 8 / 16, layer_width); + EXPECT_EQ(kHeight * 8 / 16, layer_height); +} + +} // namespace diff --git a/test/test.mk b/test/test.mk index e07dc7724..f7a5d15a3 100644 --- a/test/test.mk +++ b/test/test.mk @@ -96,6 +96,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.cc endif # VP9 diff --git a/test/video_source.h b/test/video_source.h index 26d53282f..3d01d39b2 100644 --- a/test/video_source.h +++ b/test/video_source.h @@ -18,16 +18,35 @@ namespace libvpx_test { -static FILE *OpenTestDataFile(const std::string& file_name) { - std::string path_to_source = file_name; - const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH"); - - if (kDataPath) { - path_to_source = kDataPath; - path_to_source += "/"; - path_to_source += file_name; +// Helper macros to ensure LIBVPX_TEST_DATA_PATH is a quoted string. +// These are undefined right below GetDataPath +// NOTE: LIBVPX_TEST_DATA_PATH MUST NOT be a quoted string before +// Stringification or the GetDataPath will fail at runtime +#define TO_STRING(S) #S +#define STRINGIFY(S) TO_STRING(S) + +// A simple function to encapsulate cross platform retrieval of test data path +static std::string GetDataPath() { + const char *const data_path = getenv("LIBVPX_TEST_DATA_PATH"); + if (data_path == NULL) { +#ifdef LIBVPX_TEST_DATA_PATH + // In some environments, we cannot set environment variables + // Instead, we set the data path by using a preprocessor symbol + // which can be set from make files + return STRINGIFY(LIBVPX_TEST_DATA_PATH); +#else + return "."; +#endif } + return data_path; +} +// Undefining stringification macros because they are not used elsewhere +#undef TO_STRING +#undef STRINGIFY + +static FILE *OpenTestDataFile(const std::string& file_name) { + const std::string path_to_source = GetDataPath() + "/" + file_name; return fopen(path_to_source.c_str(), "rb"); } diff --git a/test/vp9_lossless_test.cc b/test/vp9_lossless_test.cc index 441cc44da..30a3118eb 100644 --- a/test/vp9_lossless_test.cc +++ b/test/vp9_lossless_test.cc @@ -35,7 +35,7 @@ class LossLessTest : public ::libvpx_test::EncoderTest, } virtual void BeginPassHook(unsigned int /*pass*/) { - psnr_ = 0.0; + psnr_ = kMaxPsnr; nframes_ = 0; } @@ -65,9 +65,9 @@ TEST_P(LossLessTest, TestLossLessEncoding) { init_flags_ = VPX_CODEC_USE_PSNR; // intentionally changed the dimension for better testing coverage - libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284, + libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 30); - + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double psnr_lossless = GetMinPsnr(); EXPECT_GE(psnr_lossless, kMaxPsnr); } diff --git a/tools_common.c b/tools_common.c index 92de79418..44b2a3fa0 100644 --- a/tools_common.c +++ b/tools_common.c @@ -7,8 +7,11 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include <stdio.h> #include "tools_common.h" + +#include <stdarg.h> +#include <stdlib.h> + #if defined(_WIN32) || defined(__OS2__) #include <io.h> #include <fcntl.h> @@ -20,6 +23,18 @@ #endif #endif +#define LOG_ERROR(label) do {\ + const char *l = label;\ + va_list ap;\ + va_start(ap, fmt);\ + if (l)\ + fprintf(stderr, "%s: ", l);\ + vfprintf(stderr, fmt, ap);\ + fprintf(stderr, "\n");\ + va_end(ap);\ +} while (0) + + FILE *set_binary_mode(FILE *stream) { (void)stream; #if defined(_WIN32) || defined(__OS2__) @@ -27,3 +42,17 @@ FILE *set_binary_mode(FILE *stream) { #endif return stream; } + +void die(const char *fmt, ...) { + LOG_ERROR(NULL); + usage_exit(); +} + +void fatal(const char *fmt, ...) { + LOG_ERROR("Fatal"); + exit(EXIT_FAILURE); +} + +void warn(const char *fmt, ...) { + LOG_ERROR("Warning"); +} diff --git a/tools_common.h b/tools_common.h index 9e56149a5..7dfd5ad21 100644 --- a/tools_common.h +++ b/tools_common.h @@ -7,10 +7,24 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TOOLS_COMMON_H -#define TOOLS_COMMON_H +#ifndef TOOLS_COMMON_H_ +#define TOOLS_COMMON_H_ + +#include <stdio.h> + +#define VP8_FOURCC (0x30385056) +#define VP9_FOURCC (0x30395056) +#define VP8_FOURCC_MASK (0x00385056) +#define VP9_FOURCC_MASK (0x00395056) /* Sets a stdio stream into binary mode */ FILE *set_binary_mode(FILE *stream); -#endif +void die(const char *fmt, ...); +void fatal(const char *fmt, ...); +void warn(const char *fmt, ...); + +/* The tool including this file must define usage_exit() */ +void usage_exit(); + +#endif // TOOLS_COMMON_H_ diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm new file mode 100644 index 000000000..1f03ff60e --- /dev/null +++ b/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm @@ -0,0 +1,144 @@ +; +; Copyright (c) 2013 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + EXPORT |vp9_idct32x32_1_add_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + + ;TODO(hkuang): put the following macros in a seperate + ;file so other idct function could also use them. + MACRO + LD_16x8 $src, $stride + vld1.8 {q8}, [$src], $stride + vld1.8 {q9}, [$src], $stride + vld1.8 {q10}, [$src], $stride + vld1.8 {q11}, [$src], $stride + vld1.8 {q12}, [$src], $stride + vld1.8 {q13}, [$src], $stride + vld1.8 {q14}, [$src], $stride + vld1.8 {q15}, [$src], $stride + MEND + + MACRO + ADD_DIFF_16x8 $diff + vqadd.u8 q8, q8, $diff + vqadd.u8 q9, q9, $diff + vqadd.u8 q10, q10, $diff + vqadd.u8 q11, q11, $diff + vqadd.u8 q12, q12, $diff + vqadd.u8 q13, q13, $diff + vqadd.u8 q14, q14, $diff + vqadd.u8 q15, q15, $diff + MEND + + MACRO + SUB_DIFF_16x8 $diff + vqsub.u8 q8, q8, $diff + vqsub.u8 q9, q9, $diff + vqsub.u8 q10, q10, $diff + vqsub.u8 q11, q11, $diff + vqsub.u8 q12, q12, $diff + vqsub.u8 q13, q13, $diff + vqsub.u8 q14, q14, $diff + vqsub.u8 q15, q15, $diff + MEND + + MACRO + ST_16x8 $dst, $stride + vst1.8 {q8}, [$dst], $stride + vst1.8 {q9}, [$dst], $stride + vst1.8 {q10},[$dst], $stride + vst1.8 {q11},[$dst], $stride + vst1.8 {q12},[$dst], $stride + vst1.8 {q13},[$dst], $stride + vst1.8 {q14},[$dst], $stride + vst1.8 {q15},[$dst], $stride + MEND + +;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, +; int dest_stride) +; +; r0 int16_t input +; r1 uint8_t *dest +; r2 int dest_stride + +|vp9_idct32x32_1_add_neon| PROC + push {lr} + pld [r1] + add r3, r1, #16 ; r3 dest + 16 for second loop + ldrsh r0, [r0] + + ; generate cospi_16_64 = 11585 + mov r12, #0x2d00 + add r12, #0x41 + + ; out = dct_const_round_shift(input[0] * cospi_16_64) + mul r0, r0, r12 ; input[0] * cospi_16_64 + add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) + asr r0, r0, #14 ; >> DCT_CONST_BITS + + ; out = dct_const_round_shift(out * cospi_16_64) + mul r0, r0, r12 ; out * cospi_16_64 + mov r12, r1 ; save dest + add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) + asr r0, r0, #14 ; >> DCT_CONST_BITS + + ; a1 = ROUND_POWER_OF_TWO(out, 6) + add r0, r0, #32 ; + (1 <<((6) - 1)) + asrs r0, r0, #6 ; >> 6 + bge diff_positive_32_32 + +diff_negative_32_32 + neg r0, r0 + usat r0, #8, r0 + vdup.u8 q0, r0 + mov r0, #4 + +diff_negative_32_32_loop + sub r0, #1 + LD_16x8 r1, r2 + SUB_DIFF_16x8 q0 + ST_16x8 r12, r2 + + LD_16x8 r1, r2 + SUB_DIFF_16x8 q0 + ST_16x8 r12, r2 + cmp r0, #2 + moveq r1, r3 + moveq r12, r3 + cmp r0, #0 + bne diff_negative_32_32_loop + pop {r3,pc} + +diff_positive_32_32 + usat r0, #8, r0 + vdup.u8 q0, r0 + mov r0, #4 + +diff_positive_32_32_loop + sub r0, #1 + LD_16x8 r1, r2 + ADD_DIFF_16x8 q0 + ST_16x8 r12, r2 + + LD_16x8 r1, r2 + ADD_DIFF_16x8 q0 + ST_16x8 r12, r2 + cmp r0, #2 + moveq r1, r3 + moveq r12, r3 + cmp r0, #0 + bne diff_positive_32_32_loop + pop {pc} + + ENDP ; |vp9_idct32x32_1_add_neon| + END diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index acb4724e5..452dd6b89 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -221,8 +221,6 @@ extern "C" int vp9_set_size_literal(VP9_PTR comp, unsigned int width, unsigned int height); - int vp9_switch_layer(VP9_PTR comp, int layer); - void vp9_set_svc(VP9_PTR comp, int use_svc); int vp9_get_quantizer(VP9_PTR c); diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index 57ca5c5da..6018e1775 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -403,8 +403,8 @@ void vp9_set_pred_flag_seg_id(MACROBLOCKD *xd, uint8_t pred_flag) { int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids, BLOCK_SIZE bsize, int mi_row, int mi_col) { const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = 1 << mi_width_log2(bsize); - const int bh = 1 << mi_height_log2(bsize); + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); int x, y, segment_id = INT_MAX; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 3f3268f2d..6313f3337 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -158,7 +158,7 @@ prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const specialize vp9_d63_predictor_32x32 $ssse3_x86inc prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_32x32 $ssse3 x86inc +specialize vp9_h_predictor_32x32 $ssse3_x86inc prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" specialize vp9_d117_predictor_32x32 @@ -299,7 +299,7 @@ prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int de specialize vp9_idct32x32_34_add sse2 prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_1_add sse2 dspr2 +specialize vp9_idct32x32_1_add sse2 neon dspr2 prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" specialize vp9_iht4x4_16_add sse2 neon dspr2 diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index ccf5aac17..2a3384488 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -15,6 +15,16 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" +#define RECON_AND_STORE4X4(dest, in_x) \ +{ \ + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + *(int *)dest = _mm_cvtsi128_si32(d0); \ + dest += stride; \ +} + void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); @@ -26,21 +36,19 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i input0, input1, input2, input3; // Rows - input0 = _mm_loadl_epi64((const __m128i *)input); - input1 = _mm_loadl_epi64((const __m128i *)(input + 4)); - input2 = _mm_loadl_epi64((const __m128i *)(input + 8)); - input3 = _mm_loadl_epi64((const __m128i *)(input + 12)); + input0 = _mm_load_si128((const __m128i *)input); + input2 = _mm_load_si128((const __m128i *)(input + 8)); // Construct i3, i1, i3, i1, i2, i0, i2, i0 input0 = _mm_shufflelo_epi16(input0, 0xd8); - input1 = _mm_shufflelo_epi16(input1, 0xd8); + input0 = _mm_shufflehi_epi16(input0, 0xd8); input2 = _mm_shufflelo_epi16(input2, 0xd8); - input3 = _mm_shufflelo_epi16(input3, 0xd8); + input2 = _mm_shufflehi_epi16(input2, 0xd8); + input1 = _mm_unpackhi_epi32(input0, input0); input0 = _mm_unpacklo_epi32(input0, input0); - input1 = _mm_unpacklo_epi32(input1, input1); + input3 = _mm_unpackhi_epi32(input2, input2); input2 = _mm_unpacklo_epi32(input2, input2); - input3 = _mm_unpacklo_epi32(input3, input3); // Stage 1 input0 = _mm_madd_epi16(input0, cst); @@ -59,16 +67,14 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); // Stage 2 - input0 = _mm_packs_epi32(input0, zero); - input1 = _mm_packs_epi32(input1, zero); - input2 = _mm_packs_epi32(input2, zero); - input3 = _mm_packs_epi32(input3, zero); + input0 = _mm_packs_epi32(input0, input1); + input1 = _mm_packs_epi32(input2, input3); // Transpose - input1 = _mm_unpacklo_epi16(input0, input1); - input3 = _mm_unpacklo_epi16(input2, input3); - input0 = _mm_unpacklo_epi32(input1, input3); - input1 = _mm_unpackhi_epi32(input1, input3); + input2 = _mm_unpacklo_epi16(input0, input1); + input3 = _mm_unpackhi_epi16(input0, input1); + input0 = _mm_unpacklo_epi32(input2, input3); + input1 = _mm_unpackhi_epi32(input2, input3); // Switch column2, column 3, and then, we got: // input2: column1, column 0; input3: column2, column 3. @@ -78,14 +84,9 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { // Columns // Construct i3, i1, i3, i1, i2, i0, i2, i0 - input0 = _mm_shufflelo_epi16(input2, 0xd8); - input1 = _mm_shufflehi_epi16(input2, 0xd8); - input2 = _mm_shufflehi_epi16(input3, 0xd8); - input3 = _mm_shufflelo_epi16(input3, 0xd8); - - input0 = _mm_unpacklo_epi32(input0, input0); - input1 = _mm_unpackhi_epi32(input1, input1); - input2 = _mm_unpackhi_epi32(input2, input2); + input0 = _mm_unpacklo_epi32(input2, input2); + input1 = _mm_unpackhi_epi32(input2, input2); + input2 = _mm_unpackhi_epi32(input3, input3); input3 = _mm_unpacklo_epi32(input3, input3); // Stage 1 @@ -105,16 +106,14 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); // Stage 2 - input0 = _mm_packs_epi32(input0, zero); - input1 = _mm_packs_epi32(input1, zero); - input2 = _mm_packs_epi32(input2, zero); - input3 = _mm_packs_epi32(input3, zero); + input0 = _mm_packs_epi32(input0, input2); + input1 = _mm_packs_epi32(input1, input3); // Transpose - input1 = _mm_unpacklo_epi16(input0, input1); - input3 = _mm_unpacklo_epi16(input2, input3); - input0 = _mm_unpacklo_epi32(input1, input3); - input1 = _mm_unpackhi_epi32(input1, input3); + input2 = _mm_unpacklo_epi16(input0, input1); + input3 = _mm_unpackhi_epi16(input0, input1); + input0 = _mm_unpacklo_epi32(input2, input3); + input1 = _mm_unpackhi_epi32(input2, input3); // Switch column2, column 3, and then, we got: // input2: column1, column 0; input3: column2, column 3. @@ -129,23 +128,31 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { input2 = _mm_srai_epi16(input2, 4); input3 = _mm_srai_epi16(input3, 4); -#define RECON_AND_STORE4X4(dest, in_x) \ - { \ - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - *(int *)dest = _mm_cvtsi128_si32(d0); \ - dest += stride; \ + // Reconstruction and Store + { + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi32(d0, + _mm_cvtsi32_si128(*(const int *) (dest + stride))); + d2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128( + *(const int *) (dest + stride * 3)), d2); + d0 = _mm_unpacklo_epi8(d0, zero); + d2 = _mm_unpacklo_epi8(d2, zero); + d0 = _mm_add_epi16(d0, input2); + d2 = _mm_add_epi16(d2, input3); + d0 = _mm_packus_epi16(d0, d2); + // store input0 + *(int *)dest = _mm_cvtsi128_si32(d0); + // store input1 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); + // store input2 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); + // store input3 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); } - - input0 = _mm_srli_si128(input2, 8); - input1 = _mm_srli_si128(input3, 8); - - RECON_AND_STORE4X4(dest, input2); - RECON_AND_STORE4X4(dest, input0); - RECON_AND_STORE4X4(dest, input1); - RECON_AND_STORE4X4(dest, input3); } void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index c86451649..fd8e74ca4 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -44,7 +44,7 @@ static int vp9_read(vp9_reader *br, int probability) { VP9_BD_VALUE bigsplit; int count; unsigned int range; - unsigned int split = 1 + (((br->range - 1) * probability) >> 8); + unsigned int split = ((br->range * probability) + (256 - probability)) >> 8; if (br->count < 0) vp9_reader_fill(br); diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 1ca578621..abdcf955c 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -91,8 +91,8 @@ static TX_SIZE read_tx_size(VP9_COMMON *const cm, MACROBLOCKD *const xd, static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize, int mi_row, int mi_col, int segment_id) { const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = 1 << mi_width_log2(bsize); - const int bh = 1 << mi_height_log2(bsize); + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); int x, y; @@ -260,6 +260,16 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, mv->col = ref->col + diff.col; } +static COMPPREDMODE_TYPE read_reference_mode(VP9_COMMON *cm, + const MACROBLOCKD *xd, + vp9_reader *r) { + const int ctx = vp9_get_pred_context_comp_inter_inter(cm, xd); + const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.comp_inter[ctx][mode]; + return mode; // SINGLE_PREDICTION_ONLY or COMP_PREDICTION_ONLY +} + // Read the referncence frame static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r, @@ -271,27 +281,20 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { - const int comp_ctx = vp9_get_pred_context_comp_inter_inter(cm, xd); - int is_comp; - - if (cm->comp_pred_mode == HYBRID_PREDICTION) { - is_comp = vp9_read(r, fc->comp_inter_prob[comp_ctx]); - if (!cm->frame_parallel_decoding_mode) - ++counts->comp_inter[comp_ctx][is_comp]; - } else { - is_comp = cm->comp_pred_mode == COMP_PREDICTION_ONLY; - } + const COMPPREDMODE_TYPE mode = (cm->comp_pred_mode == HYBRID_PREDICTION) + ? read_reference_mode(cm, xd, r) + : cm->comp_pred_mode; // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding - if (is_comp) { - const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; - const int ref_ctx = vp9_get_pred_context_comp_ref_p(cm, xd); - const int b = vp9_read(r, fc->comp_ref_prob[ref_ctx]); + if (mode == COMP_PREDICTION_ONLY) { + const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); + const int bit = vp9_read(r, fc->comp_ref_prob[ctx]); if (!cm->frame_parallel_decoding_mode) - ++counts->comp_ref[ref_ctx][b]; - ref_frame[fix_ref_idx] = cm->comp_fixed_ref; - ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; - } else { + ++counts->comp_ref[ctx][bit]; + ref_frame[idx] = cm->comp_fixed_ref; + ref_frame[!idx] = cm->comp_var_ref[bit]; + } else if (mode == SINGLE_PREDICTION_ONLY) { const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); if (!cm->frame_parallel_decoding_mode) @@ -299,14 +302,16 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (bit0) { const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); - ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; if (!cm->frame_parallel_decoding_mode) ++counts->single_ref[ctx1][1][bit1]; + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; } else { ref_frame[0] = LAST_FRAME; } ref_frame[1] = NONE; + } else { + assert(!"Invalid prediction mode."); } } } @@ -550,8 +555,8 @@ void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, vp9_reader *r) { MODE_INFO *const mi = xd->mi_8x8[0]; const BLOCK_SIZE bsize = mi->mbmi.sb_type; - const int bw = 1 << mi_width_log2(bsize); - const int bh = 1 << mi_height_log2(bsize); + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; const int y_mis = MIN(bh, cm->mi_rows - mi_row); const int x_mis = MIN(bw, cm->mi_cols - mi_col); int x, y, z; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 1fd9e979a..7e3bbaa16 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -153,47 +153,38 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); } -static void update_mv(vp9_reader *r, vp9_prob *p) { - if (vp9_read(r, NMV_UPDATE_PROB)) - *p = (vp9_read_literal(r, 7) << 1) | 1; +static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) { + int i; + for (i = 0; i < n; ++i) + if (vp9_read(r, NMV_UPDATE_PROB)) + p[i] = (vp9_read_literal(r, 7) << 1) | 1; } -static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { - int i, j, k; +static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) { + int i, j; - for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(r, &mvc->joints[j]); + update_mv_probs(ctx->joints, MV_JOINTS - 1, r); for (i = 0; i < 2; ++i) { - nmv_component *const comp = &mvc->comps[i]; - - update_mv(r, &comp->sign); - - for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(r, &comp->classes[j]); - - for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(r, &comp->class0[j]); - - for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(r, &comp->bits[j]); + nmv_component *const comp_ctx = &ctx->comps[i]; + update_mv_probs(&comp_ctx->sign, 1, r); + update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); + update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); + update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); } for (i = 0; i < 2; ++i) { - nmv_component *const comp = &mvc->comps[i]; - + nmv_component *const comp_ctx = &ctx->comps[i]; for (j = 0; j < CLASS0_SIZE; ++j) - for (k = 0; k < 3; ++k) - update_mv(r, &comp->class0_fp[j][k]); - - for (j = 0; j < 3; ++j) - update_mv(r, &comp->fp[j]); + update_mv_probs(comp_ctx->class0_fp[j], 3, r); + update_mv_probs(comp_ctx->fp, 3, r); } if (allow_hp) { for (i = 0; i < 2; ++i) { - update_mv(r, &mvc->comps[i].class0_hp); - update_mv(r, &mvc->comps[i].hp); + nmv_component *const comp_ctx = &ctx->comps[i]; + update_mv_probs(&comp_ctx->class0_hp, 1, r); + update_mv_probs(&comp_ctx->hp, 1, r); } } } @@ -209,20 +200,22 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { // Allocate storage for each tile column. // TODO(jzern): when max_threads <= 1 the same storage could be used for each // tile. -static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) { +static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) { VP9_COMMON *const cm = &pbi->common; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - int i, tile_col; + int i, tile_row, tile_col; CHECK_MEM_ERROR(cm, pbi->mi_streams, - vpx_realloc(pbi->mi_streams, tile_cols * + vpx_realloc(pbi->mi_streams, tile_rows * tile_cols * sizeof(*pbi->mi_streams))); - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - TileInfo tile; - - vp9_tile_init(&tile, cm, 0, tile_col); - pbi->mi_streams[tile_col] = - &cm->mi[cm->mi_rows * tile.mi_col_start]; + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileInfo tile; + vp9_tile_init(&tile, cm, tile_row, tile_col); + pbi->mi_streams[tile_row * tile_cols + tile_col] = + &cm->mi[tile.mi_row_start * cm->mode_info_stride + + tile.mi_col_start]; + } } // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm @@ -248,7 +241,7 @@ static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) { static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; - int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block); + int16_t* const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const int stride = pd->dst.stride; const int eob = pd->eobs[block]; if (eob > 0) { @@ -261,35 +254,35 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block); if (tx_type == DCT_DCT) - xd->itxm_add(qcoeff, dst, stride, eob); + xd->itxm_add(dqcoeff, dst, stride, eob); else - vp9_iht4x4_16_add(qcoeff, dst, stride, tx_type); + vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type); break; case TX_8X8: tx_type = get_tx_type_8x8(pd->plane_type, xd); - vp9_iht8x8_add(tx_type, qcoeff, dst, stride, eob); + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_16X16: tx_type = get_tx_type_16x16(pd->plane_type, xd); - vp9_iht16x16_add(tx_type, qcoeff, dst, stride, eob); + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_32X32: tx_type = DCT_DCT; - vp9_idct32x32_add(qcoeff, dst, stride, eob); + vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: assert(!"Invalid transform size"); } if (eob == 1) { - vpx_memset(qcoeff, 0, 2 * sizeof(qcoeff[0])); + vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0])); } else { if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - vpx_memset(qcoeff, 0, 4 * (4 << tx_size) * sizeof(qcoeff[0])); + vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); else if (tx_size == TX_32X32 && eob <= 34) - vpx_memset(qcoeff, 0, 256 * sizeof(qcoeff[0])); + vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); else - vpx_memset(qcoeff, 0, (16 << (tx_size << 1)) * sizeof(qcoeff[0])); + vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } } @@ -360,16 +353,15 @@ static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, const int bh = num_8x8_blocks_high_lookup[bsize]; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int offset = mi_row * cm->mode_info_stride + mi_col; - - xd->mode_info_stride = cm->mode_info_stride; + const int tile_offset = tile->mi_row_start * cm->mode_info_stride + + tile->mi_col_start; xd->mi_8x8 = cm->mi_grid_visible + offset; xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset; // we are using the mode info context stream here - xd->mi_8x8[0] = xd->mi_stream; + xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset; xd->mi_8x8[0]->mbmi.sb_type = bsize; - ++xd->mi_stream; // Special case: if prev_mi is NULL, the previous mode info context // cannot be used. @@ -768,9 +760,10 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, } static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd, - int tile_col) { + int tile_row, int tile_col) { int i; - xd->mi_stream = pbi->mi_streams[tile_col]; + const int tile_cols = 1 << pbi->common.log2_tile_cols; + xd->mi_stream = pbi->mi_streams[tile_row * tile_cols + tile_col]; for (i = 0; i < MAX_MB_PLANE; ++i) { xd->above_context[i] = pbi->above_context[i]; @@ -874,77 +867,68 @@ static size_t get_tile(const uint8_t *const data_end, return size; } -static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { - vp9_reader residual_bc; +typedef struct TileBuffer { + const uint8_t *data; + size_t size; +} TileBuffer; +static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - - const uint8_t *const data_end = pbi->source + pbi->source_sz; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; + TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; + const uint8_t *const data_end = pbi->source + pbi->source_sz; + const uint8_t *end = NULL; + vp9_reader r; + + assert(tile_rows <= 4); + assert(tile_cols <= (1 << 6)); // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols); + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols); vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_mi_cols); - - if (pbi->oxcf.inv_tile_order) { - const uint8_t *data_ptr2[4][1 << 6]; - vp9_reader bc_bak = {0}; - - // pre-initialize the offsets, we're going to decode in inverse order - data_ptr2[0][0] = data; - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - const int last_tile = - tile_row == tile_rows - 1 && tile_col == tile_cols - 1; - const size_t size = get_tile(data_end, last_tile, &cm->error, &data); - data_ptr2[tile_row][tile_col] = data; - data += size; - } + sizeof(*pbi->above_seg_context) * aligned_cols); + + // Load tile data into tile_buffers + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + const int last_tile = tile_row == tile_rows - 1 && + tile_col == tile_cols - 1; + const size_t size = get_tile(data_end, last_tile, &cm->error, &data); + TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; + buf->data = data; + buf->size = size; + data += size; } + } - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - for (tile_col = tile_cols - 1; tile_col >= 0; tile_col--) { - TileInfo tile; - - vp9_tile_init(&tile, cm, tile_row, tile_col); - setup_token_decoder(data_ptr2[tile_row][tile_col], data_end, - data_end - data_ptr2[tile_row][tile_col], - &cm->error, &residual_bc); - setup_tile_context(pbi, xd, tile_col); - decode_tile(pbi, &tile, &residual_bc); - if (tile_row == tile_rows - 1 && tile_col == tile_cols - 1) - bc_bak = residual_bc; - } - } - residual_bc = bc_bak; - } else { - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - const int last_tile = - tile_row == tile_rows - 1 && tile_col == tile_cols - 1; - const size_t size = get_tile(data_end, last_tile, &cm->error, &data); - TileInfo tile; - - vp9_tile_init(&tile, cm, tile_row, tile_col); - - setup_token_decoder(data, data_end, size, &cm->error, &residual_bc); - setup_tile_context(pbi, xd, tile_col); - decode_tile(pbi, &tile, &residual_bc); - data += size; - } + // Decode tiles using data from tile_buffers + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1 + : tile_col; + const int last_tile = tile_row == tile_rows - 1 && + col == tile_cols - 1; + const TileBuffer *const buf = &tile_buffers[tile_row][col]; + TileInfo tile; + + vp9_tile_init(&tile, cm, tile_row, col); + setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r); + setup_tile_context(pbi, xd, tile_row, col); + decode_tile(pbi, &tile, &r); + + if (last_tile) + end = vp9_reader_find_end(&r); } } - return vp9_reader_find_end(&residual_bc); + return end; } static int tile_worker_hook(void *arg1, void *arg2) { @@ -1023,7 +1007,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { setup_token_decoder(data, data_end, size, &cm->error, &tile_data->bit_reader); - setup_tile_context(pbi, &tile_data->xd, tile_col); + setup_tile_context(pbi, &tile_data->xd, 0, tile_col); worker->had_error = 0; if (i == num_workers - 1 || tile_col == tile_cols - 1) { @@ -1227,7 +1211,7 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, for (i = 0; i < PARTITION_TYPES - 1; ++i) vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); - read_mv_probs(&r, nmvc, cm->allow_high_precision_mv); + read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); } return vp9_reader_has_error(&r); @@ -1323,7 +1307,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { } } - alloc_tile_storage(pbi, tile_cols); + alloc_tile_storage(pbi, tile_rows, tile_cols); xd->mi_8x8 = cm->mi_grid_visible; xd->mode_info_stride = cm->mode_info_stride; @@ -1335,7 +1319,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cm->counts); for (i = 0; i < MAX_MB_PLANE; ++i) - vp9_zero(xd->plane[i].qcoeff); + vp9_zero(xd->plane[i].dqcoeff); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 70d0d74ef..b8d670b96 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -61,20 +61,22 @@ static const vp9_prob cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; +static const int token_to_counttoken[MAX_ENTROPY_TOKENS] = { + ZERO_TOKEN, ONE_TOKEN, TWO_TOKEN, TWO_TOKEN, + TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, + TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, DCT_EOB_MODEL_TOKEN +}; + #define INCREMENT_COUNT(token) \ do { \ - if (!cm->frame_parallel_decoding_mode) { \ - ++coef_counts[type][ref][band][pt] \ - [token >= TWO_TOKEN ? \ - (token == DCT_EOB_TOKEN ? \ - DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \ - token]; \ - } \ - } while (0) + if (!cm->frame_parallel_decoding_mode) { \ + ++coef_counts[band][pt][token_to_counttoken[token]]; \ + } \ + } while (0); #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ + dqcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ dq[c > 0] / (1 + (tx_size == TX_32X32)); \ INCREMENT_COUNT(token); \ token_cache[scan[c]] = vp9_pt_energy_class[token]; \ @@ -82,15 +84,14 @@ static const vp9_prob cat6_prob[15] = { continue; \ } -#define ADJUST_COEF(prob, bits_count) \ - do { \ - if (vp9_read(r, prob)) \ - val += 1 << bits_count; \ +#define ADJUST_COEF(prob, bits_count) \ + do { \ + val += (vp9_read(r, prob) << bits_count); \ } while (0); static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r, int block_idx, - PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr, + PLANE_TYPE type, int seg_eob, int16_t *dqcoeff_ptr, TX_SIZE tx_size, const int16_t *dq, int pt, uint8_t *token_cache) { const FRAME_CONTEXT *const fc = &cm->fc; @@ -102,7 +103,10 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } }; const vp9_prob *prob; - vp9_coeff_count_model *coef_counts = counts->coef[tx_size]; + unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES + 1] = + counts->coef[tx_size][type][ref]; + unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] = + counts->eob_branch[tx_size][type][ref]; const int16_t *scan, *nb; const uint8_t *const band_translate = get_band_translate(tx_size); get_scan(xd, tx_size, type, block_idx, &scan, &nb); @@ -117,7 +121,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; if (!cm->frame_parallel_decoding_mode) - ++counts->eob_branch[tx_size][type][ref][band][pt]; + ++eob_branch_count[band][pt]; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; @@ -205,7 +209,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, if (c < seg_eob) { if (!cm->frame_parallel_decoding_mode) - ++coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]; + ++coef_counts[band][pt][DCT_EOB_MODEL_TOKEN]; } return c; @@ -224,7 +228,7 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, pd->left_context + loff); eob = decode_coefs(cm, xd, r, block, - pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block), + pd->plane_type, seg_eob, BLOCK_OFFSET(pd->dqcoeff, block), tx_size, pd->dequant, pt, token_cache); set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 87bd36c2b..07a67a585 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1217,7 +1217,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileInfo tile; - vp9_tile_init(&tile, cm, 0, tile_col); + vp9_tile_init(&tile, cm, tile_row, tile_col); tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col]; if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 583c6c8d0..8033a4d15 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -136,7 +136,7 @@ struct macroblock { // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; - uint8_t token_cache[1024]; + DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); int optimize; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 86332bcf9..702fc70bb 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1490,7 +1490,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, return; } } - assert(mi_height_log2(bsize) == mi_width_log2(bsize)); + assert(num_8x8_blocks_wide_lookup[bsize] == + num_8x8_blocks_high_lookup[bsize]); if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); @@ -1764,7 +1765,7 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile, } static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, TOKENEXTRA **tp, int *totalrate) { + int mi_row, TOKENEXTRA **tp) { VP9_COMMON * const cm = &cpi->common; int mi_col; @@ -1909,7 +1910,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { MACROBLOCK * const x = &cpi->mb; VP9_COMMON * const cm = &cpi->common; MACROBLOCKD * const xd = &x->e_mbd; - int totalrate; // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", // cpi->common.current_video_frame, cpi->common.show_frame, @@ -1925,8 +1925,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { } #endif - totalrate = 0; - vp9_zero(cm->counts.switchable_interp); vp9_zero(cpi->tx_stepdown_count); @@ -1988,7 +1986,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += 8) - encode_sb_row(cpi, &tile, mi_row, &tp, &totalrate); + encode_sb_row(cpi, &tile, mi_row, &tp); cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); @@ -2014,10 +2012,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->sf.skip_encode_frame = 0; } - // 256 rate units to the bit, - // projected_frame_size in units of BYTES - cpi->projected_frame_size = totalrate >> 8; - #if 0 // Keep record of the total distortion this time around for future use cpi->last_frame_distortion = cpi->frame_distortion; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index e52e8ec1e..75ed8eab7 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -430,11 +430,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. if (x->zcoeff_blk[tx_size][block] && plane == 0) { - int x, y; + int i, j; pd->eobs[block] = 0; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); - ctx->ta[plane][x] = 0; - ctx->tl[plane][y] = 0; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + ctx->ta[plane][i] = 0; + ctx->tl[plane][j] = 0; return; } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index c3dbc861d..6a3555d68 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -584,9 +584,9 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, - 1 << mi_height_log2(xd->mi_8x8[0]->mbmi.sb_type), + num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type], mb_col << 1, - 1 << mi_width_log2(xd->mi_8x8[0]->mbmi.sb_type), + num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type], cm->mi_rows, cm->mi_cols); if (cpi->sf.variance_adaptive_quantization) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 1d3170a55..f922f900a 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -4242,37 +4242,9 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width, return 0; } -int vp9_switch_layer(VP9_PTR comp, int layer) { - VP9_COMP *cpi = (VP9_COMP *)comp; - - if (cpi->use_svc) { - cpi->current_layer = layer; - - // Use buffer i for layer i LST - cpi->lst_fb_idx = layer; - - // Use buffer i-1 for layer i Alt (Inter-layer prediction) - if (layer != 0) cpi->alt_fb_idx = layer - 1; - - // Use the rest for Golden - if (layer < 2 * cpi->number_spatial_layers - NUM_REF_FRAMES) - cpi->gld_fb_idx = cpi->lst_fb_idx; - else - cpi->gld_fb_idx = 2 * cpi->number_spatial_layers - 1 - layer; - - printf("Switching to layer %d:\n", layer); - printf("Using references: LST/GLD/ALT [%d|%d|%d]\n", cpi->lst_fb_idx, - cpi->gld_fb_idx, cpi->alt_fb_idx); - } else { - printf("Switching layer not supported. Enable SVC first \n"); - } - return 0; -} - void vp9_set_svc(VP9_PTR comp, int use_svc) { VP9_COMP *cpi = (VP9_COMP *)comp; cpi->use_svc = use_svc; - if (cpi->use_svc) printf("Enabled SVC encoder \n"); return; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 993919e5b..e49789e0c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -609,7 +609,7 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): temporarily enabled only for luma component rd = MIN(rd1, rd2); - if (plane == 0) + if (!xd->lossless && plane == 0) x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block]; args->this_rate += args->rate; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 579f7a6e9..7d4676e97 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -81,6 +81,7 @@ struct tokenize_b_args { MACROBLOCKD *xd; TOKENEXTRA **tp; TX_SIZE tx_size; + uint8_t *token_cache; }; static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, @@ -99,6 +100,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; + uint8_t *token_cache = args->token_cache; struct macroblockd_plane *pd = &xd->plane[plane]; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; int pt; /* near block/prev token context index */ @@ -113,7 +115,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, vp9_coeff_count *const counts = cpi->coef_counts[tx_size]; vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size]; const int ref = is_inter_block(mbmi); - uint8_t token_cache[1024]; const uint8_t *const band_translate = get_band_translate(tx_size); const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); int aoff, loff; @@ -197,7 +198,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, const int mb_skip_context = vp9_get_pred_context_mbskip(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size}; + struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache}; mbmi->skip_coeff = vp9_sb_is_skippable(xd, bsize); if (mbmi->skip_coeff) { diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_vaq.c index 3179ae301..1f9cb8709 100644 --- a/vp9/encoder/vp9_vaq.c +++ b/vp9/encoder/vp9_vaq.c @@ -118,8 +118,8 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, ((-xd->mb_to_bottom_edge) >> 3) : 0; if (right_overflow || bottom_overflow) { - int bw = (1 << (mi_width_log2(bs) + 3)) - right_overflow; - int bh = (1 << (mi_height_log2(bs) + 3)) - bottom_overflow; + const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow; + const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow; int avg; variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, bw, bh, &sse, &avg); diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm index 533456b77..1a9e4e8b6 100644 --- a/vp9/encoder/x86/vp9_subpel_variance.asm +++ b/vp9/encoder/x86/vp9_subpel_variance.asm @@ -118,6 +118,14 @@ SECTION .text RET %endmacro +%macro INC_SRC_BY_SRC_STRIDE 0 +%if ARCH_X86=1 && CONFIG_PIC=1 + add srcq, src_stridemp +%else + add srcq, src_strideq +%endif +%endmacro + %macro SUBPEL_VARIANCE 1-2 0 ; W %if cpuflag(ssse3) %define bilin_filter_m bilin_filter_m_ssse3 @@ -129,41 +137,85 @@ SECTION .text ; FIXME(rbultje) only bilinear filters use >8 registers, and ssse3 only uses ; 11, not 13, if the registers are ordered correctly. May make a minor speed ; difference on Win64 -%ifdef PIC -%if %2 == 1 ; avg -cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, height, sse -%define sec_str sec_strideq -%else -cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, y_offset, \ - dst, dst_stride, height, sse -%endif -%define h heightd -%define bilin_filter sseq -%else -%if %2 == 1 ; avg -cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \ - 7 + 2 * ARCH_X86_64, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, \ - height, sse -%if ARCH_X86_64 -%define h heightd -%define sec_str sec_strideq -%else -%define h dword heightm -%define sec_str sec_stridemp -%endif + +%ifdef PIC ; 64bit PIC + %if %2 == 1 ; avg + cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ + x_offset, y_offset, \ + dst, dst_stride, \ + sec, sec_stride, height, sse + %define sec_str sec_strideq + %else + cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \ + y_offset, dst, dst_stride, height, sse + %endif + %define h heightd + %define bilin_filter sseq %else -cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ - dst, dst_stride, height, sse -%define h heightd -%endif -%define bilin_filter bilin_filter_m + %if ARCH_X86=1 && CONFIG_PIC=1 + %if %2 == 1 ; avg + cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ + x_offset, y_offset, \ + dst, dst_stride, \ + sec, sec_stride, \ + height, sse, g_bilin_filter, g_pw_8 + %define h dword heightm + %define sec_str sec_stridemp + + ;Store bilin_filter and pw_8 location in stack + GET_GOT eax + add esp, 4 ; restore esp + + lea ecx, [GLOBAL(bilin_filter_m)] + mov g_bilin_filterm, ecx + + lea ecx, [GLOBAL(pw_8)] + mov g_pw_8m, ecx + + LOAD_IF_USED 0, 1 ; load eax, ecx back + %else + cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \ + y_offset, dst, dst_stride, height, sse, \ + g_bilin_filter, g_pw_8 + %define h heightd + + ;Store bilin_filter and pw_8 location in stack + GET_GOT eax + add esp, 4 ; restore esp + + lea ecx, [GLOBAL(bilin_filter_m)] + mov g_bilin_filterm, ecx + + lea ecx, [GLOBAL(pw_8)] + mov g_pw_8m, ecx + + LOAD_IF_USED 0, 1 ; load eax, ecx back + %endif + %else + %if %2 == 1 ; avg + cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \ + 7 + 2 * ARCH_X86_64, 13, src, src_stride, \ + x_offset, y_offset, \ + dst, dst_stride, \ + sec, sec_stride, \ + height, sse + %if ARCH_X86_64 + %define h heightd + %define sec_str sec_strideq + %else + %define h dword heightm + %define sec_str sec_stridemp + %endif + %else + cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \ + y_offset, dst, dst_stride, height, sse + %define h heightd + %endif + + %define bilin_filter bilin_filter_m + %endif %endif + ASSERT %1 <= 16 ; m6 overflows if w > 16 pxor m6, m6 ; sum pxor m7, m7 ; sse @@ -329,11 +381,22 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx +%if ARCH_X86=1 && CONFIG_PIC=1 +; x_offset == 0, reuse x_offset reg +%define tempq x_offsetq + add y_offsetq, g_bilin_filterm +%define filter_y_a [y_offsetq] +%define filter_y_b [y_offsetq+16] + mov tempq, g_pw_8m +%define filter_rnd [tempq] +%else add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [pw_8] %endif +%endif + .x_zero_y_other_loop: %if %1 == 16 movu m0, [srcq] @@ -615,12 +678,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 +%else ;x86_32 +%if ARCH_X86=1 && CONFIG_PIC=1 +; x_offset == 0.5. We can reuse x_offset reg +%define tempq x_offsetq + add y_offsetq, g_bilin_filterm +%define filter_y_a [y_offsetq] +%define filter_y_b [y_offsetq+16] + mov tempq, g_pw_8m +%define filter_rnd [tempq] %else add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [pw_8] %endif +%endif + %if %1 == 16 movu m0, [srcq] movu m3, [srcq+1] @@ -752,12 +826,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 +%else ; x86-32 +%if ARCH_X86=1 && CONFIG_PIC=1 +;y_offset == 0. We can reuse y_offset reg. +%define tempq y_offsetq + add x_offsetq, g_bilin_filterm +%define filter_x_a [x_offsetq] +%define filter_x_b [x_offsetq+16] + mov tempq, g_pw_8m +%define filter_rnd [tempq] %else add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_rnd [pw_8] %endif +%endif + .x_other_y_zero_loop: %if %1 == 16 movu m0, [srcq] @@ -873,12 +958,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 +%else ; x86-32 +%if ARCH_X86=1 && CONFIG_PIC=1 +; y_offset == 0.5. We can reuse y_offset reg. +%define tempq y_offsetq + add x_offsetq, g_bilin_filterm +%define filter_x_a [x_offsetq] +%define filter_x_b [x_offsetq+16] + mov tempq, g_pw_8m +%define filter_rnd [tempq] %else add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_rnd [pw_8] %endif +%endif + %if %1 == 16 movu m0, [srcq] movu m1, [srcq+1] @@ -1057,6 +1153,21 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %define filter_y_a m10 %define filter_y_b m11 %define filter_rnd m12 +%else ; x86-32 +%if ARCH_X86=1 && CONFIG_PIC=1 +; In this case, there is NO unused register. Used src_stride register. Later, +; src_stride has to be loaded from stack when it is needed. +%define tempq src_strideq + mov tempq, g_bilin_filterm + add x_offsetq, tempq + add y_offsetq, tempq +%define filter_x_a [x_offsetq] +%define filter_x_b [x_offsetq+16] +%define filter_y_a [y_offsetq] +%define filter_y_b [y_offsetq+16] + + mov tempq, g_pw_8m +%define filter_rnd [tempq] %else add x_offsetq, bilin_filter add y_offsetq, bilin_filter @@ -1066,6 +1177,8 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %define filter_y_b [y_offsetq+16] %define filter_rnd [pw_8] %endif +%endif + ; x_offset == bilin interpolation && y_offset == bilin interpolation %if %1 == 16 movu m0, [srcq] @@ -1093,7 +1206,9 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %endif psraw m0, 4 psraw m2, 4 - add srcq, src_strideq + + INC_SRC_BY_SRC_STRIDE + packuswb m0, m2 .x_other_y_other_loop: %if cpuflag(ssse3) @@ -1163,7 +1278,7 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 - add srcq, src_strideq + INC_SRC_BY_SRC_STRIDE add dstq, dst_strideq %else ; %1 < 16 movh m0, [srcq] @@ -1184,12 +1299,17 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ %if cpuflag(ssse3) packuswb m0, m0 %endif - add srcq, src_strideq + + INC_SRC_BY_SRC_STRIDE + .x_other_y_other_loop: movh m2, [srcq] movh m1, [srcq+1] - movh m4, [srcq+src_strideq] - movh m3, [srcq+src_strideq+1] + + INC_SRC_BY_SRC_STRIDE + movh m4, [srcq] + movh m3, [srcq+1] + %if cpuflag(ssse3) punpcklbw m2, m1 punpcklbw m4, m3 @@ -1253,7 +1373,7 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \ SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 - lea srcq, [srcq+src_strideq*2] + INC_SRC_BY_SRC_STRIDE lea dstq, [dstq+dst_strideq*2] %endif %if %2 == 1 ; avg diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 0badb0855..db36506a0 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -123,6 +123,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_1_add_neon$(AS VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_1_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_1_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht4x4_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM) diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 4d3967059..194203967 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -442,8 +442,6 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP8E_SET_ARNR_TYPE, xcfg.arnr_type); MAP(VP8E_SET_TUNING, xcfg.tuning); MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); - MAP(VP9E_SET_MAX_Q, ctx->cfg.rc_max_quantizer); - MAP(VP9E_SET_MIN_Q, ctx->cfg.rc_min_quantizer); MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct); MAP(VP9E_SET_LOSSLESS, xcfg.lossless); MAP(VP9E_SET_FRAME_PARALLEL_DECODING, xcfg.frame_parallel_decoding_mode); @@ -1009,66 +1007,40 @@ static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_set_width(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { - unsigned int *data = va_arg(args, unsigned int *); - if (data) { - int res; - res = vp9_set_size_literal(ctx->cpi, *data, 0); - if (!res) { - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } - } else { - return VPX_CODEC_INVALID_PARAM; - } +static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, + va_list args) { + int data = va_arg(args, int); + vp9_set_svc(ctx->cpi, data); + return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_height(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - unsigned int *data = va_arg(args, unsigned int *); - - if (data) { - int res; - res = vp9_set_size_literal(ctx->cpi, 0, *data); +static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_svc_parameters_t *data = va_arg(args, vpx_svc_parameters_t *); + VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; + vpx_svc_parameters_t params; - if (!res) { - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } - } else { + if (data == NULL) { return VPX_CODEC_INVALID_PARAM; } -} - -static vpx_codec_err_t vp9e_set_layer(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - unsigned int *data = va_arg(args, unsigned int *); - if (data) { - int res; - res = 0; + params = *(vpx_svc_parameters_t *)data; - res = vp9_switch_layer(ctx->cpi, *data); + cpi->current_layer = params.layer; + cpi->lst_fb_idx = params.lst_fb_idx; + cpi->gld_fb_idx = params.gld_fb_idx; + cpi->alt_fb_idx = params.alt_fb_idx; - if (!res) { - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } - } else { + if (vp9_set_size_literal(ctx->cpi, params.width, params.height) != 0) { return VPX_CODEC_INVALID_PARAM; } -} -static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { - int data = va_arg(args, int); - vp9_set_svc(ctx->cpi, data); + ctx->cfg.rc_max_quantizer = params.max_quantizer; + ctx->cfg.rc_min_quantizer = params.min_quantizer; + + set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); + vp9_change_config(ctx->cpi, &ctx->oxcf); + return VPX_CODEC_OK; } @@ -1096,16 +1068,12 @@ static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { {VP8E_SET_ARNR_TYPE, set_param}, {VP8E_SET_TUNING, set_param}, {VP8E_SET_CQ_LEVEL, set_param}, - {VP9E_SET_MAX_Q, set_param}, - {VP9E_SET_MIN_Q, set_param}, {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, {VP9E_SET_LOSSLESS, set_param}, {VP9E_SET_FRAME_PARALLEL_DECODING, set_param}, {VP9_GET_REFERENCE, get_reference}, - {VP9E_SET_WIDTH, vp9e_set_width}, - {VP9E_SET_HEIGHT, vp9e_set_height}, - {VP9E_SET_LAYER, vp9e_set_layer}, {VP9E_SET_SVC, vp9e_set_svc}, + {VP9E_SET_SVC_PARAMETERS, vp9e_set_svc_parameters}, { -1, NULL}, }; diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c index 8bb582ffa..9acfa29bc 100644 --- a/vp9_spatial_scalable_encoder.c +++ b/vp9_spatial_scalable_encoder.c @@ -13,61 +13,101 @@ * VP9 encoding scheme based on spatial scalability for video applications * that benefit from a scalable bitstream. */ -#include <stdio.h> -#include <stdlib.h> + #include <stdarg.h> -#include <time.h> +#include <stdlib.h> #include <string.h> -#include <unistd.h> -#include <libgen.h> -#define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx/vpx_encoder.h" +#include <time.h> +#include "./args.h" +#include "vpx/svc_context.h" #include "vpx/vp8cx.h" -#define interface (vpx_codec_vp9_cx()) -#define fourcc 0x30395056 -#define IVF_FILE_HDR_SZ (32) -#define IVF_FRAME_HDR_SZ (12) -#define NUM_BUFFERS 8 - -char *input_filename; -char *output_filename; -unsigned int number_frames_to_code = 60 * 60; -unsigned int number_frames_to_skip = 0; -unsigned int number_spatial_layers = 5; -unsigned int key_period = 100; - -typedef enum ENCODING_MODE { - INTER_LAYER_PREDICTION_I, - INTER_LAYER_PREDICTION_IP, - USE_GOLDEN_FRAME -} ENCODING_MODE; - -static void mem_put_le16(char *mem, unsigned int val) { +#include "vpx/vpx_encoder.h" + +#define VP90_FOURCC 0x30395056 + +static const struct arg_enum_list encoding_mode_enum[] = { + {"i", INTER_LAYER_PREDICTION_I}, + {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP}, + {"ip", INTER_LAYER_PREDICTION_IP}, + {"gf", USE_GOLDEN_FRAME}, + {NULL, 0} +}; + +static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM( + "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum); +static const arg_def_t skip_frames_arg = + ARG_DEF("s", "skip-frames", 1, "input frames to skip"); +static const arg_def_t frames_arg = + ARG_DEF("f", "frames", 1, "number of frames to encode"); +static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width"); +static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height"); +static const arg_def_t timebase_arg = + ARG_DEF("t", "timebase", 1, "timebase (num/den)"); +static const arg_def_t bitrate_arg = ARG_DEF( + "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second"); +static const arg_def_t layers_arg = + ARG_DEF("l", "layers", 1, "number of SVC layers"); +static const arg_def_t kf_dist_arg = + ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes"); +static const arg_def_t scale_factors_arg = + ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)"); +static const arg_def_t quantizers_arg = + ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)"); +static const arg_def_t dummy_frame_arg = + ARG_DEF("z", "dummy-frame", 1, "make first frame blank and full size"); + +static const arg_def_t *svc_args[] = { + &encoding_mode_arg, &frames_arg, &width_arg, &height_arg, + &timebase_arg, &bitrate_arg, &skip_frames_arg, &layers_arg, + &kf_dist_arg, &scale_factors_arg, &quantizers_arg, &dummy_frame_arg, + NULL +}; + +static const SVC_ENCODING_MODE default_encoding_mode = + INTER_LAYER_PREDICTION_IP; +static const uint32_t default_frames_to_skip = 0; +static const uint32_t default_frames_to_code = 60 * 60; +static const uint32_t default_width = 1920; +static const uint32_t default_height = 1080; +static const uint32_t default_timebase_num = 1; +static const uint32_t default_timebase_den = 60; +static const uint32_t default_bitrate = 1000; +static const uint32_t default_spatial_layers = 5; +static const uint32_t default_kf_dist = 100; +static const int default_use_dummy_frame = 1; + +typedef struct { + char *input_filename; + char *output_filename; + uint32_t frames_to_code; + uint32_t frames_to_skip; +} AppInput; + +static void mem_put_le16(char *mem, uint32_t val) { mem[0] = val; mem[1] = val >> 8; } -static void mem_put_le32(char *mem, unsigned int val) { +static void mem_put_le32(char *mem, uint32_t val) { mem[0] = val; mem[1] = val >> 8; mem[2] = val >> 16; mem[3] = val >> 24; } -static void usage(char *program_name) { - printf( - "Usage: %s [-f frames] [-s skip_frames] [-w width] [-h height] \n\t" - "[-n rate_num] [-d rate_den] [-b bitrate] [-l layers] " - "<input_filename> <output_filename>\n", - basename(program_name)); +static void usage(const char *exec_name) { + fprintf(stderr, "Usage: %s <options> input_filename output_filename\n", + exec_name); + fprintf(stderr, "Options:\n"); + arg_show_usage(stderr, svc_args); exit(EXIT_FAILURE); } -static void die(const char *fmt, ...) { +void die(const char *fmt, ...) { va_list ap; va_start(ap, fmt); - vprintf(fmt, ap); + vfprintf(stderr, fmt, ap); if (fmt[strlen(fmt) - 1] != '\n') printf("\n"); exit(EXIT_FAILURE); } @@ -81,407 +121,261 @@ static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { } static int read_frame(FILE *f, vpx_image_t *img) { - size_t nbytes, to_read; + size_t nbytes; int res = 1; + int plane; - to_read = img->w * img->h * 3 / 2; - nbytes = fread(img->planes[0], 1, to_read, f); - if (nbytes != to_read) { - res = 0; - if (nbytes > 0) - printf("Warning: Read partial frame. Check your width & height!\n"); + for (plane = 0; plane < 3; ++plane) { + uint8_t *ptr; + const int w = (plane ? (1 + img->d_w) / 2 : img->d_w); + const int h = (plane ? (1 + img->d_h) / 2 : img->d_h); + int r; + + switch (plane) { + case 1: + ptr = img->planes[VPX_PLANE_U]; + break; + case 2: + ptr = img->planes[VPX_PLANE_V]; + break; + default: + ptr = img->planes[plane]; + } + for (r = 0; r < h; ++r) { + const int to_read = w; + + nbytes = fread(ptr, 1, to_read, f); + if (nbytes != to_read) { + res = 0; + if (nbytes > 0) + printf("Warning: Read partial frame. Check your width & height!\n"); + break; + } + ptr += img->stride[plane]; + } + if (!res) break; } return res; } -static int read_dummy_frame(vpx_image_t *img) { - size_t to_read; - - to_read = img->w * img->h * 3 / 2; - memset(img->planes[0], 129, to_read); +static int create_dummy_frame(vpx_image_t *img) { + const size_t buf_size = img->w * img->h * 3 / 2; + memset(img->planes[0], 129, buf_size); return 1; } -static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, +static void write_ivf_file_header(FILE *outfile, + uint32_t width, uint32_t height, + int timebase_num, int timebase_den, int frame_cnt) { char header[32]; - if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; header[0] = 'D'; header[1] = 'K'; header[2] = 'I'; header[3] = 'F'; - mem_put_le16(header + 4, 0); /* version */ - mem_put_le16(header + 6, 32); /* headersize */ - mem_put_le32(header + 8, fourcc); /* headersize */ - mem_put_le16(header + 12, cfg->g_w); /* width */ - mem_put_le16(header + 14, cfg->g_h); /* height */ - mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ - mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ - mem_put_le32(header + 24, frame_cnt); /* length */ - mem_put_le32(header + 28, 0); /* unused */ + mem_put_le16(header + 4, 0); /* version */ + mem_put_le16(header + 6, 32); /* headersize */ + mem_put_le32(header + 8, VP90_FOURCC); /* fourcc */ + mem_put_le16(header + 12, width); /* width */ + mem_put_le16(header + 14, height); /* height */ + mem_put_le32(header + 16, timebase_den); /* rate */ + mem_put_le32(header + 20, timebase_num); /* scale */ + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ (void)fwrite(header, 1, 32, outfile); } -static void write_ivf_frame_header(FILE *outfile, - const vpx_codec_cx_pkt_t *pkt) { +static void write_ivf_frame_header(FILE *outfile, vpx_codec_pts_t pts, + size_t sz) { char header[12]; - vpx_codec_pts_t pts; - - if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; - - pts = pkt->data.frame.pts; - mem_put_le32(header, pkt->data.frame.sz); + mem_put_le32(header, (uint32_t)sz); mem_put_le32(header + 4, pts & 0xFFFFFFFF); mem_put_le32(header + 8, pts >> 32); (void)fwrite(header, 1, 12, outfile); } -static void check_parameters() { - if (number_spatial_layers > 5) die("Cannot support more than 5 layers"); -} - -static void parse_command_line(int argc, char **argv, - vpx_codec_enc_cfg_t *cfg) { - unsigned int width = 1920; - unsigned int height = 1080; - unsigned int timebase_num = 1; - unsigned int timebase_den = 60; - unsigned int bitrate = 1000; - int c; +static void parse_command_line(int argc, const char **argv_, + AppInput *app_input, SvcContext *svc_ctx, + vpx_codec_enc_cfg_t *enc_cfg) { + struct arg arg; + char **argv, **argi, **argj; vpx_codec_err_t res; - opterr = 0; - while ((c = getopt(argc, argv, "f:w:h:n:d:b:s:l:p:")) != -1) switch (c) { - case 'f': - number_frames_to_code = atoi(optarg); - break; - case 'w': - width = atoi(optarg); - break; - case 'h': - height = atoi(optarg); - break; - case 'n': - timebase_num = atoi(optarg); - break; - case 'd': - timebase_den = atoi(optarg); - break; - case 'b': - bitrate = atoi(optarg); - break; - case 's': - number_frames_to_skip = atoi(optarg); - break; - case 'l': - number_spatial_layers = atoi(optarg); - break; - case 'p': - key_period = atoi(optarg); - break; - case '?': - usage(argv[0]); - } - - // Parse required parameters - if (argc - optind != 2) { - usage(argv[0]); - } + // initialize SvcContext with parameters that will be passed to vpx_svc_init + svc_ctx->log_level = SVC_LOG_DEBUG; + svc_ctx->spatial_layers = default_spatial_layers; + svc_ctx->encoding_mode = default_encoding_mode; + // when using a dummy frame, that frame is only encoded to be full size + svc_ctx->first_frame_full_size = default_use_dummy_frame; - input_filename = argv[optind]; - output_filename = argv[optind + 1]; - - if (width < 16 || width % 2 || height < 16 || height % 2) - die("Invalid resolution: %d x %d", width, height); - - /* Populate encoder configuration */ - res = vpx_codec_enc_config_default(interface, cfg, 0); + // start with default encoder configuration + res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0); if (res) { die("Failed to get config: %s\n", vpx_codec_err_to_string(res)); } - printf( - "Codec %s\nframes: %d, skip: %d, layers: %d\n" - "width %d, height: %d, \n" - "num: %d, den: %d, bitrate: %d, \n" - "key period: %d \n", - vpx_codec_iface_name(interface), number_frames_to_code, - number_frames_to_skip, number_spatial_layers, width, height, timebase_num, - timebase_den, bitrate, key_period); - - // Do minimal check at the application level. Encoder parameters will be - // checked internally - check_parameters(); - - cfg->rc_target_bitrate = bitrate; - cfg->g_w = width; - cfg->g_h = height; - cfg->g_timebase.num = timebase_num; - cfg->g_timebase.den = timebase_den; - cfg->ss_number_layers = number_spatial_layers; -} - -static void set_default_configuration(vpx_codec_enc_cfg_t *cfg) { - /* Real time parameters */ - cfg->rc_dropframe_thresh = 0; - cfg->rc_end_usage = VPX_CBR; - cfg->rc_resize_allowed = 0; - cfg->rc_min_quantizer = 33; - cfg->rc_max_quantizer = 33; - cfg->rc_undershoot_pct = 100; - cfg->rc_overshoot_pct = 15; - cfg->rc_buf_initial_sz = 500; - cfg->rc_buf_optimal_sz = 600; - cfg->rc_buf_sz = 1000; - - /* Enable error resilient mode */ - cfg->g_error_resilient = 1; - cfg->g_lag_in_frames = 0; - - /* Disable automatic keyframe placement */ - cfg->kf_mode = VPX_KF_DISABLED; - cfg->kf_min_dist = cfg->kf_max_dist = 3000; -} - -static void initialize_codec(vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *cfg) { - int max_intra_size_pct; - - /* Initialize codec */ - if (vpx_codec_enc_init(codec, interface, cfg, VPX_CODEC_USE_PSNR)) - die_codec(codec, "Failed to initialize encoder"); - - vpx_codec_control(codec, VP9E_SET_SVC, 1); - /* Cap CPU & first I-frame size */ - vpx_codec_control(codec, VP8E_SET_CPUUSED, 1); - vpx_codec_control(codec, VP8E_SET_STATIC_THRESHOLD, 1); - vpx_codec_control(codec, VP8E_SET_NOISE_SENSITIVITY, 1); - vpx_codec_control(codec, VP8E_SET_TOKEN_PARTITIONS, 1); - - max_intra_size_pct = - (int)(((double)cfg->rc_buf_optimal_sz * 0.5) * - ((double)cfg->g_timebase.den / cfg->g_timebase.num) / 10.0); - /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */ + // update enc_cfg with app default values + enc_cfg->g_w = default_width; + enc_cfg->g_h = default_height; + enc_cfg->g_timebase.num = default_timebase_num; + enc_cfg->g_timebase.den = default_timebase_den; + enc_cfg->rc_target_bitrate = default_bitrate; + enc_cfg->kf_min_dist = default_kf_dist; + enc_cfg->kf_max_dist = default_kf_dist; + + // initialize AppInput with default values + app_input->frames_to_code = default_frames_to_code; + app_input->frames_to_skip = default_frames_to_skip; + + // process command line options + argv = argv_dup(argc - 1, argv_ + 1); + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + + if (arg_match(&arg, &encoding_mode_arg, argi)) { + svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg); + } else if (arg_match(&arg, &frames_arg, argi)) { + app_input->frames_to_code = arg_parse_uint(&arg); + } else if (arg_match(&arg, &width_arg, argi)) { + enc_cfg->g_w = arg_parse_uint(&arg); + } else if (arg_match(&arg, &height_arg, argi)) { + enc_cfg->g_h = arg_parse_uint(&arg); + } else if (arg_match(&arg, &height_arg, argi)) { + enc_cfg->g_h = arg_parse_uint(&arg); + } else if (arg_match(&arg, &timebase_arg, argi)) { + enc_cfg->g_timebase = arg_parse_rational(&arg); + } else if (arg_match(&arg, &bitrate_arg, argi)) { + enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &skip_frames_arg, argi)) { + app_input->frames_to_skip = arg_parse_uint(&arg); + } else if (arg_match(&arg, &layers_arg, argi)) { + svc_ctx->spatial_layers = arg_parse_uint(&arg); + } else if (arg_match(&arg, &kf_dist_arg, argi)) { + enc_cfg->kf_min_dist = arg_parse_uint(&arg); + enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; + } else if (arg_match(&arg, &scale_factors_arg, argi)) { + vpx_svc_set_scale_factors(svc_ctx, arg.val); + } else if (arg_match(&arg, &quantizers_arg, argi)) { + vpx_svc_set_quantizers(svc_ctx, arg.val); + } else if (arg_match(&arg, &dummy_frame_arg, argi)) { + svc_ctx->first_frame_full_size = arg_parse_int(&arg); + } else { + ++argj; + } + } - vpx_codec_control(codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); -} + // Check for unrecognized options + for (argi = argv; *argi; ++argi) + if (argi[0][0] == '-' && strlen(argi[0]) > 1) + die("Error: Unrecognized option %s\n", *argi); -static int calculate_layer(int frame_cnt, int number_spatial_layers) { - if (frame_cnt == 0) - return 0; - else - return (frame_cnt + number_spatial_layers - 1) % number_spatial_layers; -} + if (argv[0] == NULL || argv[1] == 0) { + usage(argv_[0]); + } + app_input->input_filename = argv[0]; + app_input->output_filename = argv[1]; + free(argv); -static void switch_to_layer(int layer, unsigned int initial_width, - unsigned int initial_height, - vpx_codec_ctx_t *codec) { - // Set layer size - int scaling_factor_num[MAX_LAYERS] = {2, 1, 4, 2, 1}; - int scaling_factor_den[MAX_LAYERS] = {9, 3, 9, 3, 1}; - - int quantizer[MAX_LAYERS] = {60, 53, 39, 33, 27}; - - unsigned int current_width; - unsigned int current_height; - - current_width = initial_width * - scaling_factor_num[layer + 5 - number_spatial_layers] / - scaling_factor_den[layer + 5 - number_spatial_layers]; - current_height = initial_height * - scaling_factor_num[layer + 5 - number_spatial_layers] / - scaling_factor_den[layer + 5 - number_spatial_layers]; - - current_width += current_width % 2; - current_height += current_height % 2; - - vpx_codec_control(codec, VP9E_SET_WIDTH, ¤t_width); - vpx_codec_control(codec, VP9E_SET_HEIGHT, ¤t_height); - - // Set layer context - vpx_codec_control(codec, VP9E_SET_LAYER, &layer); - vpx_codec_control(codec, VP9E_SET_MAX_Q, - quantizer[layer + 5 - number_spatial_layers]); - vpx_codec_control(codec, VP9E_SET_MIN_Q, - quantizer[layer + 5 - number_spatial_layers]); -} + if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || + enc_cfg->g_h % 2) + die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); -static int get_flag(int is_I_frame_in_layer, int layer, ENCODING_MODE mode) { - // First layer - switch (mode) { - case INTER_LAYER_PREDICTION_I: - if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; - if (layer == 0) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - else if (is_I_frame_in_layer) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; - else - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - break; - - case INTER_LAYER_PREDICTION_IP: - if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; - if (layer == 0) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - else if (is_I_frame_in_layer) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; - else - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; - break; - - case USE_GOLDEN_FRAME: - if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; - if (2 * number_spatial_layers - NUM_BUFFERS <= layer) { - if (layer == 0) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_ARF; - else if (is_I_frame_in_layer) - return VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_REF_LAST; - else - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - } else { - if (layer == 0) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - else if (is_I_frame_in_layer) - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; - else - return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - } - break; - default: - return VPX_EFLAG_FORCE_KF; - } + printf( + "Codec %s\nframes: %d, skip: %d\n" + "mode: %d, layers: %d\n" + "width %d, height: %d,\n" + "num: %d, den: %d, bitrate: %d,\n" + "gop size: %d, use_dummy_frame: %d\n", + vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code, + app_input->frames_to_skip, svc_ctx->encoding_mode, + svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h, + enc_cfg->g_timebase.num, enc_cfg->g_timebase.den, + enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist, + svc_ctx->first_frame_full_size); } -int main(int argc, char **argv) { - FILE *infile, *outfile[MAX_LAYERS]; +int main(int argc, const char **argv) { + AppInput app_input = {0}; + FILE *infile, *outfile; vpx_codec_ctx_t codec; - vpx_codec_enc_cfg_t cfg; - int frame_cnt = 0; + vpx_codec_enc_cfg_t enc_cfg; + SvcContext svc_ctx; + uint32_t i; + uint32_t frame_cnt = 0; vpx_image_t raw; - int frame_avail = 1; - int got_data = 0; - int i; - int frames_in_layer[MAX_LAYERS] = {0}; - clock_t before; - clock_t after; + vpx_codec_err_t res; int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ - parse_command_line(argc, argv, &cfg); + memset(&svc_ctx, 0, sizeof(svc_ctx)); + svc_ctx.log_print = 1; + parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); // Allocate image buffer - if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, cfg.g_w, cfg.g_h, 32)) - die("Failed to allocate image", cfg.g_w, cfg.g_h); - - set_default_configuration(&cfg); - - /* Open input file */ - if (!(infile = fopen(input_filename, "rb"))) - die("Failed to open %s for reading", argv[1]); - - /* Open output file */ - for (i = 0; i < number_spatial_layers; i++) { - char file_name[512]; - snprintf(file_name, sizeof(file_name), "%s_%d.ivf", output_filename, i); - if (!(outfile[i] = fopen(file_name, "wb"))) - die("Failed to open %s for writing", file_name); - write_ivf_file_header(outfile[i], &cfg, 0); - } + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + + if (!(infile = fopen(app_input.input_filename, "rb"))) + die("Failed to open %s for reading\n", app_input.input_filename); + + if (!(outfile = fopen(app_input.output_filename, "wb"))) + die("Failed to open %s for writing\n", app_input.output_filename); + + // Initialize codec + if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) != + VPX_CODEC_OK) + die("Failed to initialize encoder\n"); - initialize_codec(&codec, &cfg); + write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h, + enc_cfg.g_timebase.num, enc_cfg.g_timebase.den, 0); // skip initial frames - for (i = 0; i < number_frames_to_skip; i++) { + for (i = 0; i < app_input.frames_to_skip; ++i) { read_frame(infile, &raw); } - before = clock(); - // Encoding frames - while ((frame_avail || got_data) && - frame_cnt <= number_frames_to_code * number_spatial_layers) { - int flags = 0; - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *pkt; - - int layer = calculate_layer(frame_cnt, number_spatial_layers); - int is_I_frame_in_layer = - (((frame_cnt - 1) / number_spatial_layers % key_period) == 0); - int is_dummy = (frame_cnt == 0); - - if (is_dummy) { // Dummy frame - flags = VPX_EFLAG_FORCE_KF; - frame_avail = read_dummy_frame(&raw); - - } else { // Regular frame - // Read a new frame only at the base layer - if (layer == 0) frame_avail = read_frame(infile, &raw); - switch_to_layer(layer, cfg.g_w, cfg.g_h, &codec); - flags = get_flag(is_I_frame_in_layer, layer, INTER_LAYER_PREDICTION_I); + // Encode frames + while (frame_cnt <= app_input.frames_to_code) { + if (frame_cnt == 0 && svc_ctx.first_frame_full_size) { + create_dummy_frame(&raw); + } else { + if (!read_frame(infile, &raw)) break; } - - // Actual Encoding - if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, - VPX_DL_REALTIME)) + res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration, + VPX_DL_REALTIME); + printf("%s", vpx_svc_get_message(&svc_ctx)); + if (res != VPX_CODEC_OK) { die_codec(&codec, "Failed to encode frame"); - - got_data = 0; - // Process data / Get PSNR statistics - while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) { - got_data = 1; - switch (pkt->kind) { - case VPX_CODEC_CX_FRAME_PKT: - for (i = layer; i < number_spatial_layers; i++) { - write_ivf_frame_header(outfile[i], pkt); - (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, - outfile[i]); - frames_in_layer[i]++; - } - break; - case VPX_CODEC_PSNR_PKT: - if (frame_cnt != 0) - printf( - "Processed Frame %d, layer %d, PSNR(Total/Y/U/V): " - "%2.3f %2.3f %2.3f %2.3f \n", - (frame_cnt - 1) / number_spatial_layers + 1, layer, - pkt->data.psnr.psnr[0], pkt->data.psnr.psnr[1], - pkt->data.psnr.psnr[2], pkt->data.psnr.psnr[3]); - break; - default: - break; - } } - frame_cnt++; - // TODO(ivan): Modify ts later if(!layer) + if (vpx_svc_get_frame_size(&svc_ctx) > 0) { + write_ivf_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx)); + (void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1, + vpx_svc_get_frame_size(&svc_ctx), outfile); + } + ++frame_cnt; pts += frame_duration; } - // end while - after = clock(); - printf("Processed %d frames in different resolutions in %ld ms.\n", - frame_cnt - 1, (int)(after - before) / (CLOCKS_PER_SEC / 1000)); + printf("Processed %d frames\n", frame_cnt - svc_ctx.first_frame_full_size); fclose(infile); - if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); - /* Try to rewrite the output file headers with the actual frame count */ - for (i = 0; i < number_spatial_layers; i++) { - if (!fseek(outfile[i], 0, SEEK_SET)) { - write_ivf_file_header(outfile[i], &cfg, frames_in_layer[i]); - } - fclose(outfile[i]); + // rewrite the output file headers with the actual frame count + if (!fseek(outfile, 0, SEEK_SET)) { + write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h, + enc_cfg.g_timebase.num, enc_cfg.g_timebase.den, + frame_cnt); } + fclose(outfile); + vpx_img_free(&raw); + + // display average size, psnr + printf("%s", vpx_svc_dump_statistics(&svc_ctx)); + + vpx_svc_release(&svc_ctx); return EXIT_SUCCESS; } diff --git a/vpx/exports_enc b/vpx/exports_enc index 3d5674926..1d9340c67 100644 --- a/vpx/exports_enc +++ b/vpx/exports_enc @@ -6,3 +6,17 @@ text vpx_codec_get_cx_data text vpx_codec_get_global_headers text vpx_codec_get_preview_frame text vpx_codec_set_cx_data_buf +text vpx_svc_dump_statistics +text vpx_svc_encode +text vpx_svc_free +text vpx_svc_get_buffer +text vpx_svc_get_encode_frame_count +text vpx_svc_get_frame_size +text vpx_svc_get_message +text vpx_svc_init +text vpx_svc_is_keyframe +text vpx_svc_release +text vpx_svc_set_keyframe +text vpx_svc_set_options +text vpx_svc_set_quantizers +text vpx_svc_set_scale_factors diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c new file mode 100644 index 000000000..57d21dce5 --- /dev/null +++ b/vpx/src/svc_encodeframe.c @@ -0,0 +1,981 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/** + * @file + * VP9 SVC encoding support via libvpx + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#define VPX_DISABLE_CTRL_TYPECHECKS 1 +#define VPX_CODEC_DISABLE_COMPAT 1 +#include "vpx/svc_context.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +#if defined(__MINGW32__) && !defined(MINGW_HAS_SECURE_API) +#define strtok_r strtok_s +// proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h +_CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); +#endif + +#ifdef _MSC_VER +#define strdup _strdup +#define strtok_r strtok_s +#endif + +#define SVC_REFERENCE_FRAMES 8 +#define SUPERFRAME_SLOTS (8) +#define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2) +#define OPTION_BUFFER_SIZE 256 + +static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27"; +static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16"; + +typedef struct SvcInternal { + char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options + char quantizers[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_quantizers + char scale_factors[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_scale_factors + + // values extracted from option, quantizers + int scaling_factor_num[VPX_SS_MAX_LAYERS]; + int scaling_factor_den[VPX_SS_MAX_LAYERS]; + int quantizer[VPX_SS_MAX_LAYERS]; + + // accumulated statistics + double psnr_in_layer[VPX_SS_MAX_LAYERS]; + uint32_t bytes_in_layer[VPX_SS_MAX_LAYERS]; + + // codec encoding values + int width; // width of highest layer + int height; // height of highest layer + int kf_dist; // distance between keyframes + + // state variables + int encode_frame_count; + int frame_within_gop; + vpx_enc_frame_flags_t enc_frame_flags; + int layers; + int layer; + int is_keyframe; + + size_t frame_size; + size_t buffer_size; + void *buffer; + + char message_buffer[2048]; + vpx_codec_ctx_t *codec_ctx; +} SvcInternal; + +// Superframe is used to generate an index of individual frames (i.e., layers) +struct Superframe { + int count; + uint32_t sizes[SUPERFRAME_SLOTS]; + uint32_t magnitude; + uint8_t buffer[SUPERFRAME_BUFFER_SIZE]; + size_t index_size; +}; + +// One encoded frame layer +struct LayerData { + void *buf; // compressed data buffer + size_t size; // length of compressed data + struct LayerData *next; +}; + +// create LayerData from encoder output +static struct LayerData *ld_create(void *buf, size_t size) { + struct LayerData *const layer_data = malloc(sizeof(*layer_data)); + if (layer_data == NULL) { + return NULL; + } + layer_data->buf = malloc(size); + if (layer_data->buf == NULL) { + free(layer_data); + return NULL; + } + memcpy(layer_data->buf, buf, size); + layer_data->size = size; + return layer_data; +} + +// free LayerData +static void ld_free(struct LayerData *layer_data) { + if (layer_data) { + if (layer_data->buf) { + free(layer_data->buf); + layer_data->buf = NULL; + } + free(layer_data); + } +} + +// add layer data to list +static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) { + struct LayerData **p = list; + + while (*p != NULL) p = &(*p)->next; + *p = layer_data; + layer_data->next = NULL; +} + +// get accumulated size of layer data +static size_t ld_list_get_buffer_size(struct LayerData *list) { + struct LayerData *p; + size_t size = 0; + + for (p = list; p != NULL; p = p->next) { + size += p->size; + } + return size; +} + +// copy layer data to buffer +static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) { + struct LayerData *p; + + for (p = list; p != NULL; p = p->next) { + buffer[0] = 1; + memcpy(buffer, p->buf, p->size); + buffer += p->size; + } +} + +// free layer data list +static void ld_list_free(struct LayerData *list) { + struct LayerData *p = list; + + while (p) { + list = list->next; + ld_free(p); + p = list; + } +} + +static void sf_create_index(struct Superframe *sf) { + uint8_t marker = 0xc0; + int i; + uint32_t mag, mask; + uint8_t *bufp; + + if (sf->count == 0 || sf->count >= 8) return; + + // Add the number of frames to the marker byte + marker |= sf->count - 1; + + // Choose the magnitude + for (mag = 0, mask = 0xff; mag < 4; ++mag) { + if (sf->magnitude < mask) break; + mask <<= 8; + mask |= 0xff; + } + marker |= mag << 3; + + // Write the index + sf->index_size = 2 + (mag + 1) * sf->count; + bufp = sf->buffer; + + *bufp++ = marker; + for (i = 0; i < sf->count; ++i) { + int this_sz = sf->sizes[i]; + uint32_t j; + + for (j = 0; j <= mag; ++j) { + *bufp++ = this_sz & 0xff; + this_sz >>= 8; + } + } + *bufp++ = marker; +} + +static SvcInternal *get_svc_internal(SvcContext *svc_ctx) { + if (svc_ctx == NULL) return NULL; + if (svc_ctx->internal == NULL) { + SvcInternal *const si = malloc(sizeof(*si)); + if (si != NULL) { + memset(si, 0, sizeof(*si)); + } + svc_ctx->internal = si; + } + return svc_ctx->internal; +} + +static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) { + if (svc_ctx == NULL) return NULL; + return svc_ctx->internal; +} + +static void svc_log_reset(SvcContext *svc_ctx) { + SvcInternal *const si = (SvcInternal *)svc_ctx->internal; + si->message_buffer[0] = '\0'; +} + +static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) { + char buf[512]; + int retval = 0; + va_list ap; + SvcInternal *const si = get_svc_internal(svc_ctx); + + if (level > svc_ctx->log_level) { + return retval; + } + + va_start(ap, fmt); + retval = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (svc_ctx->log_print) { + printf("%s", buf); + } else { + strncat(si->message_buffer, buf, + sizeof(si->message_buffer) - strlen(si->message_buffer) - 1); + } + + if (level == SVC_LOG_ERROR) { + si->codec_ctx->err_detail = si->message_buffer; + } + return retval; +} + +static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx, + const char *value_str) { + if (strcmp(value_str, "i") == 0) { + svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_I; + } else if (strcmp(value_str, "alt-ip") == 0) { + svc_ctx->encoding_mode = ALT_INTER_LAYER_PREDICTION_IP; + } else if (strcmp(value_str, "ip") == 0) { + svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_IP; + } else if (strcmp(value_str, "gf") == 0) { + svc_ctx->encoding_mode = USE_GOLDEN_FRAME; + } else { + svc_log(svc_ctx, SVC_LOG_ERROR, "invalid encoding mode: %s", value_str); + return VPX_CODEC_INVALID_PARAM; + } + return VPX_CODEC_OK; +} + +static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, + const char *quantizer_values) { + char *input_string; + char *token; + const char *delim = ","; + char *save_ptr; + int found = 0; + int i, q; + int res = VPX_CODEC_OK; + SvcInternal *const si = get_svc_internal(svc_ctx); + + if (quantizer_values == NULL || strlen(quantizer_values) == 0) { + input_string = strdup(DEFAULT_QUANTIZER_VALUES); + } else { + input_string = strdup(quantizer_values); + } + + token = strtok_r(input_string, delim, &save_ptr); + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + if (token != NULL) { + q = atoi(token); + if (q <= 0 || q > 100) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc-quantizer-values: invalid value %s\n", token); + res = VPX_CODEC_INVALID_PARAM; + break; + } + token = strtok_r(NULL, delim, &save_ptr); + found = i + 1; + } else { + q = 0; + } + si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q; + } + if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc: quantizers: %d values required, but only %d specified\n", + svc_ctx->spatial_layers, found); + res = VPX_CODEC_INVALID_PARAM; + } + free(input_string); + return res; +} + +static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) { + svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n", + value); +} + +static vpx_codec_err_t parse_scale_factors(SvcContext *svc_ctx, + const char *scale_factors) { + char *input_string; + char *token; + const char *delim = ","; + char *save_ptr; + int found = 0; + int i; + int64_t num, den; + int res = VPX_CODEC_OK; + SvcInternal *const si = get_svc_internal(svc_ctx); + + if (scale_factors == NULL || strlen(scale_factors) == 0) { + input_string = strdup(DEFAULT_SCALE_FACTORS); + } else { + input_string = strdup(scale_factors); + } + token = strtok_r(input_string, delim, &save_ptr); + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + num = den = 0; + if (token != NULL) { + num = strtol(token, &token, 10); + if (num <= 0) { + log_invalid_scale_factor(svc_ctx, token); + res = VPX_CODEC_INVALID_PARAM; + break; + } + if (*token++ != '/') { + log_invalid_scale_factor(svc_ctx, token); + res = VPX_CODEC_INVALID_PARAM; + break; + } + den = strtol(token, &token, 10); + if (den <= 0) { + log_invalid_scale_factor(svc_ctx, token); + res = VPX_CODEC_INVALID_PARAM; + break; + } + token = strtok_r(NULL, delim, &save_ptr); + found = i + 1; + } + si->scaling_factor_num[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = + (int)num; + si->scaling_factor_den[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = + (int)den; + } + if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc: scale-factors: %d values required, but only %d specified\n", + svc_ctx->spatial_layers, found); + res = VPX_CODEC_INVALID_PARAM; + } + free(input_string); + return res; +} + +/** + * Parse SVC encoding options + * Format: encoding-mode=<svc_mode>,layers=<layer_count> + * scale-factors=<n1>/<d1>,<n2>/<d2>,... + * quantizers=<q1>,<q2>,... + * svc_mode = [i|ip|alt_ip|gf] + */ +static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { + char *input_string; + char *option_name; + char *option_value; + char *input_ptr; + int res = VPX_CODEC_OK; + + if (options == NULL) return VPX_CODEC_OK; + input_string = strdup(options); + + // parse option name + option_name = strtok_r(input_string, "=", &input_ptr); + while (option_name != NULL) { + // parse option value + option_value = strtok_r(NULL, " ", &input_ptr); + if (option_value == NULL) { + svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n", + option_name); + res = VPX_CODEC_INVALID_PARAM; + break; + } + if (strcmp("encoding-mode", option_name) == 0) { + res = set_option_encoding_mode(svc_ctx, option_value); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("layers", option_name) == 0) { + svc_ctx->spatial_layers = atoi(option_value); + } else if (strcmp("scale-factors", option_name) == 0) { + res = parse_scale_factors(svc_ctx, option_value); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("quantizers", option_name) == 0) { + res = parse_quantizer_values(svc_ctx, option_value); + if (res != VPX_CODEC_OK) break; + } else { + svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); + res = VPX_CODEC_INVALID_PARAM; + break; + } + option_name = strtok_r(NULL, "=", &input_ptr); + } + free(input_string); + return res; +} + +vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || options == NULL || si == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + strncpy(si->options, options, sizeof(si->options)); + si->options[sizeof(si->options) - 1] = '\0'; + return VPX_CODEC_OK; +} + +vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx, + const char *quantizers) { + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || quantizers == NULL || si == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + strncpy(si->quantizers, quantizers, sizeof(si->quantizers)); + si->quantizers[sizeof(si->quantizers) - 1] = '\0'; + return VPX_CODEC_OK; +} + +vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx, + const char *scale_factors) { + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || scale_factors == NULL || si == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + strncpy(si->scale_factors, scale_factors, sizeof(si->scale_factors)); + si->scale_factors[sizeof(si->scale_factors) - 1] = '\0'; + return VPX_CODEC_OK; +} + +vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + vpx_codec_iface_t *iface, + vpx_codec_enc_cfg_t *enc_cfg) { + int max_intra_size_pct; + vpx_codec_err_t res; + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || + enc_cfg == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + if (si == NULL) return VPX_CODEC_MEM_ERROR; + + si->codec_ctx = codec_ctx; + + si->width = enc_cfg->g_w; + si->height = enc_cfg->g_h; + + if (enc_cfg->kf_max_dist < 2) { + svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n", + enc_cfg->kf_max_dist); + return VPX_CODEC_INVALID_PARAM; + } + si->kf_dist = enc_cfg->kf_max_dist; + + if (svc_ctx->spatial_layers == 0) + svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS; + if (svc_ctx->spatial_layers < 1 || + svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) { + svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n", + svc_ctx->spatial_layers); + return VPX_CODEC_INVALID_PARAM; + } + // use SvcInternal value for number of layers to enable forcing single layer + // for first frame + si->layers = svc_ctx->spatial_layers; + + res = parse_quantizer_values(svc_ctx, si->quantizers); + if (res != VPX_CODEC_OK) return res; + + res = parse_scale_factors(svc_ctx, si->scale_factors); + if (res != VPX_CODEC_OK) return res; + + // parse aggregate command line options + res = parse_options(svc_ctx, si->options); + if (res != VPX_CODEC_OK) return res; + + // modify encoder configuration + enc_cfg->ss_number_layers = si->layers; + enc_cfg->kf_mode = VPX_KF_DISABLED; + enc_cfg->g_pass = VPX_RC_ONE_PASS; + // Lag in frames not currently supported + enc_cfg->g_lag_in_frames = 0; + + // TODO(ivanmaltz): determine if these values need to be set explicitly for + // svc, or if the normal default/override mechanism can be used + enc_cfg->rc_dropframe_thresh = 0; + enc_cfg->rc_end_usage = VPX_CBR; + enc_cfg->rc_resize_allowed = 0; + enc_cfg->rc_min_quantizer = 33; + enc_cfg->rc_max_quantizer = 33; + enc_cfg->rc_undershoot_pct = 100; + enc_cfg->rc_overshoot_pct = 15; + enc_cfg->rc_buf_initial_sz = 500; + enc_cfg->rc_buf_optimal_sz = 600; + enc_cfg->rc_buf_sz = 1000; + enc_cfg->g_error_resilient = 1; + + // Initialize codec + res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR); + if (res != VPX_CODEC_OK) { + svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n"); + return res; + } + + vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); + vpx_codec_control(codec_ctx, VP8E_SET_CPUUSED, 1); + vpx_codec_control(codec_ctx, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(codec_ctx, VP8E_SET_NOISE_SENSITIVITY, 1); + vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1); + + max_intra_size_pct = + (int)(((double)enc_cfg->rc_buf_optimal_sz * 0.5) * + ((double)enc_cfg->g_timebase.den / enc_cfg->g_timebase.num) / 10.0); + vpx_codec_control(codec_ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT, + max_intra_size_pct); + return VPX_CODEC_OK; +} + +// SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h + +// encoder should reference the last frame +#define USE_LAST (1 << 0) + +// encoder should reference the alt ref frame +#define USE_ARF (1 << 1) + +// encoder should reference the golden frame +#define USE_GF (1 << 2) + +// encoder should copy current frame to the last frame buffer +#define UPDATE_LAST (1 << 3) + +// encoder should copy current frame to the alt ref frame buffer +#define UPDATE_ARF (1 << 4) + +// encoder should copy current frame to the golden frame +#define UPDATE_GF (1 << 5) + +static int map_vp8_flags(int svc_flags) { + int flags = 0; + + if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST; + if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF; + if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF; + + if (svc_flags & UPDATE_LAST) { + // last is updated automatically + } else { + flags |= VP8_EFLAG_NO_UPD_LAST; + } + if (svc_flags & UPDATE_ARF) { + flags |= VP8_EFLAG_FORCE_ARF; + } else { + flags |= VP8_EFLAG_NO_UPD_ARF; + } + if (svc_flags & UPDATE_GF) { + flags |= VP8_EFLAG_FORCE_GF; + } else { + flags |= VP8_EFLAG_NO_UPD_GF; + } + return flags; +} + +/** + * Helper to check if the current frame is the first, full resolution dummy. + */ +static int vpx_svc_dummy_frame(SvcContext *svc_ctx) { + SvcInternal *const si = get_svc_internal(svc_ctx); + return svc_ctx->first_frame_full_size == 1 && si->encode_frame_count == 0; +} + +static void calculate_enc_frame_flags(SvcContext *svc_ctx) { + vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF; + SvcInternal *const si = get_svc_internal(svc_ctx); + const int is_keyframe = (si->frame_within_gop == 0); + + // keyframe layer zero is identical for all modes + if ((is_keyframe && si->layer == 0) || vpx_svc_dummy_frame(svc_ctx)) { + si->enc_frame_flags = VPX_EFLAG_FORCE_KF; + return; + } + + switch (svc_ctx->encoding_mode) { + case ALT_INTER_LAYER_PREDICTION_IP: + if (si->layer == 0) { + flags = map_vp8_flags(USE_LAST | UPDATE_LAST); + } else if (is_keyframe) { + if (si->layer == si->layers - 1) { + flags = map_vp8_flags(USE_ARF | UPDATE_LAST); + } else { + flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF); + } + } else { + flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); + } + break; + case INTER_LAYER_PREDICTION_I: + if (si->layer == 0) { + flags = map_vp8_flags(USE_LAST | UPDATE_LAST); + } else if (is_keyframe) { + flags = map_vp8_flags(USE_ARF | UPDATE_LAST); + } else { + flags = map_vp8_flags(USE_LAST | UPDATE_LAST); + } + break; + case INTER_LAYER_PREDICTION_IP: + if (si->layer == 0) { + flags = map_vp8_flags(USE_LAST | UPDATE_LAST); + } else if (is_keyframe) { + flags = map_vp8_flags(USE_ARF | UPDATE_LAST); + } else { + flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); + } + break; + case USE_GOLDEN_FRAME: + if (2 * si->layers - SVC_REFERENCE_FRAMES <= si->layer) { + if (si->layer == 0) { + flags = map_vp8_flags(USE_LAST | USE_GF | UPDATE_LAST); + } else if (is_keyframe) { + flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF); + } else { + flags = map_vp8_flags(USE_LAST | USE_ARF | USE_GF | UPDATE_LAST); + } + } else { + if (si->layer == 0) { + flags = map_vp8_flags(USE_LAST | UPDATE_LAST); + } else if (is_keyframe) { + flags = map_vp8_flags(USE_ARF | UPDATE_LAST); + } else { + flags = map_vp8_flags(USE_LAST | UPDATE_LAST); + } + } + break; + default: + svc_log(svc_ctx, SVC_LOG_ERROR, "unexpected encoding mode: %d\n", + svc_ctx->encoding_mode); + break; + } + si->enc_frame_flags = flags; +} + +vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx, + int layer, + unsigned int *width, + unsigned int *height) { + int w, h, index, num, den; + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + + if (svc_ctx == NULL || si == NULL || width == NULL || height == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + if (layer < 0 || layer >= si->layers) return VPX_CODEC_INVALID_PARAM; + + index = layer + VPX_SS_MAX_LAYERS - si->layers; + num = si->scaling_factor_num[index]; + den = si->scaling_factor_den[index]; + if (num == 0 || den == 0) return VPX_CODEC_INVALID_PARAM; + + w = si->width * num / den; + h = si->height * num / den; + + // make height and width even to make chrome player happy + w += w % 2; + h += h % 2; + + *width = w; + *height = h; + + return VPX_CODEC_OK; +} + +static void set_svc_parameters(SvcContext *svc_ctx, + vpx_codec_ctx_t *codec_ctx) { + int layer, layer_index; + vpx_svc_parameters_t svc_params; + SvcInternal *const si = get_svc_internal(svc_ctx); + + memset(&svc_params, 0, sizeof(svc_params)); + svc_params.layer = si->layer; + svc_params.flags = si->enc_frame_flags; + + layer = si->layer; + if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && + si->frame_within_gop == 0) { + // layers 1 & 3 don't exist in this mode, use the higher one + if (layer == 0 || layer == 2) { + layer += 1; + } + } + if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer, + &svc_params.width, + &svc_params.height)) { + svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n"); + } + layer_index = layer + VPX_SS_MAX_LAYERS - si->layers; + svc_params.min_quantizer = si->quantizer[layer_index]; + svc_params.max_quantizer = si->quantizer[layer_index]; + svc_params.distance_from_i_frame = si->frame_within_gop; + + // Use buffer i for layer i LST + svc_params.lst_fb_idx = si->layer; + + // Use buffer i-1 for layer i Alt (Inter-layer prediction) + if (si->layer != 0) { + const int use_higher_layer = + svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && + si->frame_within_gop == 0; + svc_params.alt_fb_idx = use_higher_layer ? si->layer - 2 : si->layer - 1; + } + + if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP) { + svc_params.gld_fb_idx = si->layer + 1; + } else { + if (si->layer < 2 * si->layers - SVC_REFERENCE_FRAMES) + svc_params.gld_fb_idx = svc_params.lst_fb_idx; + else + svc_params.gld_fb_idx = 2 * si->layers - 1 - si->layer; + } + + svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n", + si->encode_frame_count, si->layer, svc_params.width, + svc_params.height, svc_params.min_quantizer); + + if (svc_params.flags == VPX_EFLAG_FORCE_KF) { + svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n"); + } else { + svc_log( + svc_ctx, SVC_LOG_DEBUG, "Using: LST/GLD/ALT [%2d|%2d|%2d]\n", + svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx, + svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx, + svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx); + svc_log( + svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n", + svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx, + svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx, + svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx); + } + + vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params); +} + +/** + * Encode a frame into multiple layers + * Create a superframe containing the individual layers + */ +vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + struct vpx_image *rawimg, vpx_codec_pts_t pts, + int64_t duration, int deadline) { + vpx_codec_err_t res; + vpx_codec_iter_t iter; + const vpx_codec_cx_pkt_t *cx_pkt; + struct LayerData *cx_layer_list = NULL; + struct LayerData *layer_data; + struct Superframe superframe; + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + + memset(&superframe, 0, sizeof(superframe)); + svc_log_reset(svc_ctx); + + si->layers = vpx_svc_dummy_frame(svc_ctx) ? 1 : svc_ctx->spatial_layers; + if (si->frame_within_gop >= si->kf_dist || + si->encode_frame_count == 0 || + (si->encode_frame_count == 1 && svc_ctx->first_frame_full_size == 1)) { + si->frame_within_gop = 0; + } + si->is_keyframe = (si->frame_within_gop == 0); + si->frame_size = 0; + + svc_log(svc_ctx, SVC_LOG_DEBUG, + "vpx_svc_encode layers: %d, frame_count: %d, frame_within_gop: %d\n", + si->layers, si->encode_frame_count, si->frame_within_gop); + + // encode each layer + for (si->layer = 0; si->layer < si->layers; ++si->layer) { + if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && + si->is_keyframe && (si->layer == 1 || si->layer == 3)) { + svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); + continue; + } + calculate_enc_frame_flags(svc_ctx); + + if (vpx_svc_dummy_frame(svc_ctx)) { + // do not set svc parameters, use normal encode + svc_log(svc_ctx, SVC_LOG_DEBUG, "encoding full size first frame\n"); + } else { + set_svc_parameters(svc_ctx, codec_ctx); + } + res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, + si->enc_frame_flags, deadline); + if (res != VPX_CODEC_OK) { + return res; + } + // save compressed data + iter = NULL; + while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { + switch (cx_pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz); + if (!vpx_svc_dummy_frame(svc_ctx)) { + si->bytes_in_layer[si->layer] += frame_pkt_size; + svc_log(svc_ctx, SVC_LOG_DEBUG, + "SVC frame: %d, layer: %d, size: %u\n", + si->encode_frame_count, si->layer, frame_pkt_size); + } + layer_data = + ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size); + if (layer_data == NULL) { + svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n"); + return 0; + } + ld_list_add(&cx_layer_list, layer_data); + + // save layer size in superframe index + superframe.sizes[superframe.count++] = frame_pkt_size; + superframe.magnitude |= frame_pkt_size; + break; + } + case VPX_CODEC_PSNR_PKT: { + if (!vpx_svc_dummy_frame(svc_ctx)) { + svc_log(svc_ctx, SVC_LOG_DEBUG, + "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + si->encode_frame_count, si->layer, + cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1], + cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]); + si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0]; + } + break; + } + default: { + break; + } + } + } + } + // add superframe index to layer data list + if (!vpx_svc_dummy_frame(svc_ctx)) { + sf_create_index(&superframe); + layer_data = ld_create(superframe.buffer, superframe.index_size); + ld_list_add(&cx_layer_list, layer_data); + } + // get accumulated size of layer data + si->frame_size = ld_list_get_buffer_size(cx_layer_list); + if (si->frame_size == 0) return VPX_CODEC_ERROR; + + // all layers encoded, create single buffer with concatenated layers + if (si->frame_size > si->buffer_size) { + free(si->buffer); + si->buffer = malloc(si->frame_size); + if (si->buffer == NULL) { + ld_list_free(cx_layer_list); + return VPX_CODEC_MEM_ERROR; + } + si->buffer_size = si->frame_size; + } + // copy layer data into packet + ld_list_copy_to_buffer(cx_layer_list, si->buffer); + + ld_list_free(cx_layer_list); + + svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n", + si->encode_frame_count, si->is_keyframe, (int)si->frame_size, + (int)pts); + ++si->frame_within_gop; + ++si->encode_frame_count; + + return VPX_CODEC_OK; +} + +const char *vpx_svc_get_message(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return NULL; + return si->message_buffer; +} + +void *vpx_svc_get_buffer(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return NULL; + return si->buffer; +} + +size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return 0; + return si->frame_size; +} + +int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return 0; + return si->encode_frame_count; +} + +int vpx_svc_is_keyframe(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return 0; + return si->is_keyframe; +} + +void vpx_svc_set_keyframe(SvcContext *svc_ctx) { + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return; + si->frame_within_gop = 0; +} + +// dump accumulated statistics and reset accumulated values +const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { + int number_of_frames, number_of_keyframes, encode_frame_count; + int i; + uint32_t bytes_total = 0; + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return NULL; + + svc_log_reset(svc_ctx); + + encode_frame_count = si->encode_frame_count; + if (svc_ctx->first_frame_full_size) encode_frame_count--; + if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx); + + svc_log(svc_ctx, SVC_LOG_INFO, "\n"); + number_of_keyframes = encode_frame_count / si->kf_dist + 1; + for (i = 0; i < si->layers; ++i) { + number_of_frames = encode_frame_count; + + if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && + (i == 1 || i == 3)) { + number_of_frames -= number_of_keyframes; + } + svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d PSNR=[%2.3f], Bytes=[%u]\n", i, + (double)si->psnr_in_layer[i] / number_of_frames, + si->bytes_in_layer[i]); + bytes_total += si->bytes_in_layer[i]; + si->psnr_in_layer[i] = 0; + si->bytes_in_layer[i] = 0; + } + + // only display statistics once + si->encode_frame_count = 0; + + svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total); + return vpx_svc_get_message(svc_ctx); +} + +void vpx_svc_release(SvcContext *svc_ctx) { + SvcInternal *si; + if (svc_ctx == NULL) return; + // do not use get_svc_internal as it will unnecessarily allocate an + // SvcInternal if it was not already allocated + si = (SvcInternal *)svc_ctx->internal; + if (si != NULL) { + free(si->buffer); + free(si); + svc_ctx->internal = NULL; + } +} diff --git a/vpx/svc_context.h b/vpx/svc_context.h new file mode 100644 index 000000000..f4933f8d6 --- /dev/null +++ b/vpx/svc_context.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/** + * SvcContext - input parameters and state to encode a multi-layered + * spatial SVC frame + */ + +#ifndef VPX_SVC_CONTEXT_H_ +#define VPX_SVC_CONTEXT_H_ + +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum SVC_ENCODING_MODE { + INTER_LAYER_PREDICTION_I, + ALT_INTER_LAYER_PREDICTION_IP, + INTER_LAYER_PREDICTION_IP, + USE_GOLDEN_FRAME +} SVC_ENCODING_MODE; + +typedef enum SVC_LOG_LEVEL { + SVC_LOG_ERROR, + SVC_LOG_INFO, + SVC_LOG_DEBUG +} SVC_LOG_LEVEL; + +typedef struct { + // public interface to svc_command options + int spatial_layers; // number of layers + int first_frame_full_size; // set to one to force first frame full size + SVC_ENCODING_MODE encoding_mode; // svc encoding strategy + SVC_LOG_LEVEL log_level; // amount of information to display + int log_print; // when set, printf log messages instead of returning the + // message with svc_get_message + + // private storage for vpx_svc_encode + void *internal; +} SvcContext; + +/** + * Set SVC options + * options are supplied as a single string separated by spaces + * Format: encoding-mode=<i|ip|alt-ip|gf> + * layers=<layer_count> + * scaling-factors=<n1>/<d1>,<n2>/<d2>,... + * quantizers=<q1>,<q2>,... + */ +vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options); + +/** + * Set SVC quantizer values + * values comma separated, ordered from lowest resolution to highest + * e.g., "60,53,39,33,27" + */ +vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx, + const char *quantizer_values); + +/** + * Set SVC scale factors + * values comma separated, ordered from lowest resolution to highest + * e.g., "4/16,5/16,7/16,11/16,16/16" + */ +vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx, + const char *scale_factors); + +/** + * initialize SVC encoding + */ +vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + vpx_codec_iface_t *iface, + vpx_codec_enc_cfg_t *cfg); +/** + * encode a frame of video with multiple layers + */ +vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + struct vpx_image *rawimg, vpx_codec_pts_t pts, + int64_t duration, int deadline); + +/** + * finished with svc encoding, release allocated resources + */ +void vpx_svc_release(SvcContext *svc_ctx); + +/** + * dump accumulated statistics and reset accumulated values + */ +const char *vpx_svc_dump_statistics(SvcContext *svc_ctx); + +/** + * get status message from previous encode + */ +const char *vpx_svc_get_message(const SvcContext *svc_ctx); + +/** + * return size of encoded data to be returned by vpx_svc_get_buffer + */ +size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx); + +/** + * return buffer with encoded data + */ +void *vpx_svc_get_buffer(const SvcContext *svc_ctx); + +/** + * return spatial resolution of the specified layer + */ +vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx, + int layer, + unsigned int *width, + unsigned int *height); +/** + * return number of frames that have been encoded + */ +int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx); + +/** + * return 1 if last encoded frame was a keyframe + */ +int vpx_svc_is_keyframe(const SvcContext *svc_ctx); + +/** + * force the next frame to be a keyframe + */ +void vpx_svc_set_keyframe(SvcContext *svc_ctx); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* VPX_SVC_CONTEXT_H_ */ diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 9f68c38d2..433cc0d8a 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -194,13 +194,8 @@ enum vp8e_enc_control_id { VP9E_SET_TILE_ROWS, VP9E_SET_FRAME_PARALLEL_DECODING, - VP9E_SET_WIDTH = 99, - VP9E_SET_HEIGHT, - VP9E_SET_LAYER, VP9E_SET_SVC, - - VP9E_SET_MAX_Q, - VP9E_SET_MIN_Q + VP9E_SET_SVC_PARAMETERS }; /*!\brief vpx 1-D scaling mode @@ -283,6 +278,23 @@ typedef enum { VP8_TUNE_SSIM } vp8e_tuning; +/*!\brief vp9 svc parameters + * + * This defines parameters for svc encoding. + * + */ +typedef struct vpx_svc_parameters { + unsigned int width; /**< width of current spatial layer */ + unsigned int height; /**< height of current spatial layer */ + int layer; /**< current layer number - 0 = base */ + int flags; /**< encode frame flags */ + int max_quantizer; /**< max quantizer for current layer */ + int min_quantizer; /**< min quantizer for current layer */ + int distance_from_i_frame; /**< frame number within current gop */ + int lst_fb_idx; /**< last frame frame buffer index */ + int gld_fb_idx; /**< golden frame frame buffer index */ + int alt_fb_idx; /**< alt reference frame frame buffer index */ +} vpx_svc_parameters_t; /*!\brief VP8 encoder control function parameter type * @@ -303,11 +315,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) -VPX_CTRL_USE_TYPE(VP9E_SET_LAYER, int *) VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int) - -VPX_CTRL_USE_TYPE(VP9E_SET_WIDTH, unsigned int *) -VPX_CTRL_USE_TYPE(VP9E_SET_HEIGHT, unsigned int *) +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, vpx_svc_parameters_t *) VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int) VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) @@ -334,8 +343,6 @@ VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int) -VPX_CTRL_USE_TYPE(VP9E_SET_MAX_Q, unsigned int) -VPX_CTRL_USE_TYPE(VP9E_SET_MIN_Q, unsigned int) /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk index 3d5510f66..549c24908 100644 --- a/vpx/vpx_codec.mk +++ b/vpx/vpx_codec.mk @@ -15,6 +15,8 @@ API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h +API_SRCS-$(CONFIG_VP9_ENCODER) += src/svc_encodeframe.c +API_SRCS-$(CONFIG_VP9_ENCODER) += svc_context.h API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h @@ -50,8 +50,6 @@ static const char *exec_name; -#define VP8_FOURCC (0x00385056) -#define VP9_FOURCC (0x00395056) static const struct { char const *name; const vpx_codec_iface_t *(*iface)(void); @@ -59,10 +57,10 @@ static const struct { unsigned int fourcc_mask; } ifaces[] = { #if CONFIG_VP8_DECODER - {"vp8", vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF}, + {"vp8", vpx_codec_vp8_dx, VP8_FOURCC_MASK, 0x00FFFFFF}, #endif #if CONFIG_VP9_DECODER - {"vp9", vpx_codec_vp9_dx, VP9_FOURCC, 0x00FFFFFF}, + {"vp9", vpx_codec_vp9_dx, VP9_FOURCC_MASK, 0x00FFFFFF}, #endif }; @@ -143,7 +141,7 @@ static const arg_def_t *vp8_pp_args[] = { }; #endif -static void usage_exit() { +void usage_exit() { int i; fprintf(stderr, "Usage: %s <options> filename\n\n" @@ -178,14 +176,6 @@ static void usage_exit() { exit(EXIT_FAILURE); } -void die(const char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - usage_exit(); -} - static unsigned int mem_get_le16(const void *vmem) { unsigned int val; const unsigned char *mem = (const unsigned char *)vmem; @@ -575,9 +565,9 @@ file_is_webm(struct input_ctx *input, codec_id = nestegg_track_codec_id(input->nestegg_ctx, i); if (codec_id == NESTEGG_CODEC_VP8) { - *fourcc = VP8_FOURCC; + *fourcc = VP8_FOURCC_MASK; } else if (codec_id == NESTEGG_CODEC_VP9) { - *fourcc = VP9_FOURCC; + *fourcc = VP9_FOURCC_MASK; } else { fprintf(stderr, "Not VPx video, quitting.\n"); exit(1); @@ -34,6 +34,8 @@ #include <unistd.h> #endif +#include "third_party/libyuv/include/libyuv/scale.h" + #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif @@ -44,10 +46,9 @@ #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" #include "tools_common.h" +#include "webmenc.h" #include "y4minput.h" -#include "third_party/libmkv/EbmlWriter.h" -#include "third_party/libmkv/EbmlIDs.h" -#include "third_party/libyuv/include/libyuv/scale.h" + /* Need special handling of these functions on Windows */ #if defined(_MSC_VER) @@ -89,8 +90,6 @@ static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, static const char *exec_name; -#define VP8_FOURCC (0x30385056) -#define VP9_FOURCC (0x30395056) static const struct codec_item { char const *name; const vpx_codec_iface_t *(*iface)(void); @@ -109,37 +108,6 @@ static const struct codec_item { #endif }; -static void usage_exit(); - -#define LOG_ERROR(label) do \ - {\ - const char *l=label;\ - va_list ap;\ - va_start(ap, fmt);\ - if(l)\ - fprintf(stderr, "%s: ", l);\ - vfprintf(stderr, fmt, ap);\ - fprintf(stderr, "\n");\ - va_end(ap);\ - } while(0) - -void die(const char *fmt, ...) { - LOG_ERROR(NULL); - usage_exit(); -} - - -void fatal(const char *fmt, ...) { - LOG_ERROR("Fatal"); - exit(EXIT_FAILURE); -} - - -void warn(const char *fmt, ...) { - LOG_ERROR("Warning"); -} - - static void warn_or_exit_on_errorv(vpx_codec_ctx_t *ctx, int fatal, const char *s, va_list ap) { if (ctx->err) { @@ -293,15 +261,6 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats) { return stats->buf; } -/* Stereo 3D packed frame format */ -typedef enum stereo_format { - STEREO_FORMAT_MONO = 0, - STEREO_FORMAT_LEFT_RIGHT = 1, - STEREO_FORMAT_BOTTOM_TOP = 2, - STEREO_FORMAT_TOP_BOTTOM = 3, - STEREO_FORMAT_RIGHT_LEFT = 11 -} stereo_format_t; - enum video_file_type { FILE_TYPE_RAW, FILE_TYPE_IVF, @@ -496,376 +455,6 @@ static void write_ivf_frame_size(FILE *outfile, size_t size) { } -typedef off_t EbmlLoc; - - -struct cue_entry { - unsigned int time; - uint64_t loc; -}; - - -struct EbmlGlobal { - int debug; - - FILE *stream; - int64_t last_pts_ms; - vpx_rational_t framerate; - - /* These pointers are to the start of an element */ - off_t position_reference; - off_t seek_info_pos; - off_t segment_info_pos; - off_t track_pos; - off_t cue_pos; - off_t cluster_pos; - - /* This pointer is to a specific element to be serialized */ - off_t track_id_pos; - - /* These pointers are to the size field of the element */ - EbmlLoc startSegment; - EbmlLoc startCluster; - - uint32_t cluster_timecode; - int cluster_open; - - struct cue_entry *cue_list; - unsigned int cues; - -}; - - -void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) { - (void) fwrite(buffer_in, 1, len, glob->stream); -} - -#define WRITE_BUFFER(s) \ - for(i = len-1; i>=0; i--)\ - { \ - x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \ - Ebml_Write(glob, &x, 1); \ - } -void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) { - char x; - int i; - - /* buffer_size: - * 1 - int8_t; - * 2 - int16_t; - * 3 - int32_t; - * 4 - int64_t; - */ - switch (buffer_size) { - case 1: - WRITE_BUFFER(int8_t) - break; - case 2: - WRITE_BUFFER(int16_t) - break; - case 4: - WRITE_BUFFER(int32_t) - break; - case 8: - WRITE_BUFFER(int64_t) - break; - default: - break; - } -} -#undef WRITE_BUFFER - -/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit - * one, but not a 32 bit one. - */ -static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) { - unsigned char sizeSerialized = 4 | 0x80; - Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); - Ebml_Serialize(glob, &ui, sizeof(ui), 4); -} - - -static void -Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, - unsigned long class_id) { - /* todo this is always taking 8 bytes, this may need later optimization */ - /* this is a key that says length unknown */ - uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF); - - Ebml_WriteID(glob, class_id); - *ebmlLoc = ftello(glob->stream); - Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8); -} - -static void -Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) { - off_t pos; - uint64_t size; - - /* Save the current stream pointer */ - pos = ftello(glob->stream); - - /* Calculate the size of this element */ - size = pos - *ebmlLoc - 8; - size |= LITERALU64(0x01000000, 0x00000000); - - /* Seek back to the beginning of the element and write the new size */ - fseeko(glob->stream, *ebmlLoc, SEEK_SET); - Ebml_Serialize(glob, &size, sizeof(size), 8); - - /* Reset the stream pointer */ - fseeko(glob->stream, pos, SEEK_SET); -} - - -static void -write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos) { - uint64_t offset = pos - ebml->position_reference; - EbmlLoc start; - Ebml_StartSubElement(ebml, &start, Seek); - Ebml_SerializeBinary(ebml, SeekID, id); - Ebml_SerializeUnsigned64(ebml, SeekPosition, offset); - Ebml_EndSubElement(ebml, &start); -} - - -static void -write_webm_seek_info(EbmlGlobal *ebml) { - - off_t pos; - - /* Save the current stream pointer */ - pos = ftello(ebml->stream); - - if (ebml->seek_info_pos) - fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET); - else - ebml->seek_info_pos = pos; - - { - EbmlLoc start; - - Ebml_StartSubElement(ebml, &start, SeekHead); - write_webm_seek_element(ebml, Tracks, ebml->track_pos); - write_webm_seek_element(ebml, Cues, ebml->cue_pos); - write_webm_seek_element(ebml, Info, ebml->segment_info_pos); - Ebml_EndSubElement(ebml, &start); - } - { - /* segment info */ - EbmlLoc startInfo; - uint64_t frame_time; - char version_string[64]; - - /* Assemble version string */ - if (ebml->debug) - strcpy(version_string, "vpxenc"); - else { - strcpy(version_string, "vpxenc "); - strncat(version_string, - vpx_codec_version_str(), - sizeof(version_string) - 1 - strlen(version_string)); - } - - frame_time = (uint64_t)1000 * ebml->framerate.den - / ebml->framerate.num; - ebml->segment_info_pos = ftello(ebml->stream); - Ebml_StartSubElement(ebml, &startInfo, Info); - Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000); - Ebml_SerializeFloat(ebml, Segment_Duration, - (double)(ebml->last_pts_ms + frame_time)); - Ebml_SerializeString(ebml, 0x4D80, version_string); - Ebml_SerializeString(ebml, 0x5741, version_string); - Ebml_EndSubElement(ebml, &startInfo); - } -} - - -static void -write_webm_file_header(EbmlGlobal *glob, - const vpx_codec_enc_cfg_t *cfg, - const struct vpx_rational *fps, - stereo_format_t stereo_fmt, - unsigned int fourcc) { - { - EbmlLoc start; - Ebml_StartSubElement(glob, &start, EBML); - Ebml_SerializeUnsigned(glob, EBMLVersion, 1); - Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); - Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); - Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); - Ebml_SerializeString(glob, DocType, "webm"); - Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); - Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); - Ebml_EndSubElement(glob, &start); - } - { - Ebml_StartSubElement(glob, &glob->startSegment, Segment); - glob->position_reference = ftello(glob->stream); - glob->framerate = *fps; - write_webm_seek_info(glob); - - { - EbmlLoc trackStart; - glob->track_pos = ftello(glob->stream); - Ebml_StartSubElement(glob, &trackStart, Tracks); - { - unsigned int trackNumber = 1; - uint64_t trackID = 0; - - EbmlLoc start; - Ebml_StartSubElement(glob, &start, TrackEntry); - Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); - glob->track_id_pos = ftello(glob->stream); - Ebml_SerializeUnsigned32(glob, TrackUID, trackID); - Ebml_SerializeUnsigned(glob, TrackType, 1); - Ebml_SerializeString(glob, CodecID, - fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9"); - { - unsigned int pixelWidth = cfg->g_w; - unsigned int pixelHeight = cfg->g_h; - - EbmlLoc videoStart; - Ebml_StartSubElement(glob, &videoStart, Video); - Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); - Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); - Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); - Ebml_EndSubElement(glob, &videoStart); - } - Ebml_EndSubElement(glob, &start); /* Track Entry */ - } - Ebml_EndSubElement(glob, &trackStart); - } - /* segment element is open */ - } -} - - -static void -write_webm_block(EbmlGlobal *glob, - const vpx_codec_enc_cfg_t *cfg, - const vpx_codec_cx_pkt_t *pkt) { - unsigned long block_length; - unsigned char track_number; - unsigned short block_timecode = 0; - unsigned char flags; - int64_t pts_ms; - int start_cluster = 0, is_keyframe; - - /* Calculate the PTS of this frame in milliseconds */ - pts_ms = pkt->data.frame.pts * 1000 - * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den; - if (pts_ms <= glob->last_pts_ms) - pts_ms = glob->last_pts_ms + 1; - glob->last_pts_ms = pts_ms; - - /* Calculate the relative time of this block */ - if (pts_ms - glob->cluster_timecode > SHRT_MAX) - start_cluster = 1; - else - block_timecode = (unsigned short)pts_ms - glob->cluster_timecode; - - is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY); - if (start_cluster || is_keyframe) { - if (glob->cluster_open) - Ebml_EndSubElement(glob, &glob->startCluster); - - /* Open the new cluster */ - block_timecode = 0; - glob->cluster_open = 1; - glob->cluster_timecode = (uint32_t)pts_ms; - glob->cluster_pos = ftello(glob->stream); - Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */ - Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode); - - /* Save a cue point if this is a keyframe. */ - if (is_keyframe) { - struct cue_entry *cue, *new_cue_list; - - new_cue_list = realloc(glob->cue_list, - (glob->cues + 1) * sizeof(struct cue_entry)); - if (new_cue_list) - glob->cue_list = new_cue_list; - else - fatal("Failed to realloc cue list."); - - cue = &glob->cue_list[glob->cues]; - cue->time = glob->cluster_timecode; - cue->loc = glob->cluster_pos; - glob->cues++; - } - } - - /* Write the Simple Block */ - Ebml_WriteID(glob, SimpleBlock); - - block_length = (unsigned long)pkt->data.frame.sz + 4; - block_length |= 0x10000000; - Ebml_Serialize(glob, &block_length, sizeof(block_length), 4); - - track_number = 1; - track_number |= 0x80; - Ebml_Write(glob, &track_number, 1); - - Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2); - - flags = 0; - if (is_keyframe) - flags |= 0x80; - if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) - flags |= 0x08; - Ebml_Write(glob, &flags, 1); - - Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz); -} - - -static void -write_webm_file_footer(EbmlGlobal *glob, long hash) { - - if (glob->cluster_open) - Ebml_EndSubElement(glob, &glob->startCluster); - - { - EbmlLoc start; - unsigned int i; - - glob->cue_pos = ftello(glob->stream); - Ebml_StartSubElement(glob, &start, Cues); - for (i = 0; i < glob->cues; i++) { - struct cue_entry *cue = &glob->cue_list[i]; - EbmlLoc start; - - Ebml_StartSubElement(glob, &start, CuePoint); - { - EbmlLoc start; - - Ebml_SerializeUnsigned(glob, CueTime, cue->time); - - Ebml_StartSubElement(glob, &start, CueTrackPositions); - Ebml_SerializeUnsigned(glob, CueTrack, 1); - Ebml_SerializeUnsigned64(glob, CueClusterPosition, - cue->loc - glob->position_reference); - Ebml_EndSubElement(glob, &start); - } - Ebml_EndSubElement(glob, &start); - } - Ebml_EndSubElement(glob, &start); - } - - Ebml_EndSubElement(glob, &glob->startSegment); - - /* Patch up the seek info block */ - write_webm_seek_info(glob); - - /* Patch up the track id */ - fseeko(glob->stream, glob->track_id_pos, SEEK_SET); - Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash); - - fseeko(glob->stream, 0, SEEK_END); -} - /* Murmur hash derived from public domain reference implementation at * http:// sites.google.com/site/murmurhash/ @@ -1172,7 +761,7 @@ static const int vp9_arg_ctrl_map[] = { static const arg_def_t *no_args[] = { NULL }; -static void usage_exit() { +void usage_exit() { int i; fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n", @@ -1647,7 +1236,7 @@ struct stream_state { struct stream_config config; FILE *file; struct rate_hist rate_hist; - EbmlGlobal ebml; + struct EbmlGlobal ebml; uint32_t hash; uint64_t psnr_sse_total; uint64_t psnr_samples_total; @@ -1820,17 +1409,7 @@ void open_input_file(struct input_state *input) { } else fatal("Unsupported Y4M stream."); } else if (input->detect.buf_read == 4 && file_is_ivf(input, &fourcc)) { - input->file_type = FILE_TYPE_IVF; - switch (fourcc) { - case 0x32315659: - input->use_i420 = 0; - break; - case 0x30323449: - input->use_i420 = 1; - break; - default: - fatal("Unsupported fourcc (%08x) in IVF", fourcc); - } + fatal("IVF is not supported as input."); } else { input->file_type = FILE_TYPE_RAW; } @@ -1844,7 +1423,7 @@ static void close_input_file(struct input_state *input) { } static struct stream_state *new_stream(struct global_config *global, - struct stream_state *prev) { + struct stream_state *prev) { struct stream_state *stream; stream = calloc(1, sizeof(*stream)); diff --git a/webmenc.c b/webmenc.c new file mode 100644 index 000000000..a584e9db9 --- /dev/null +++ b/webmenc.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "webmenc.h" + +#include <limits.h> +#include <string.h> + +#include "third_party/libmkv/EbmlWriter.h" +#include "third_party/libmkv/EbmlIDs.h" + +#if defined(_MSC_VER) +/* MSVS uses _f{seek,tell}i64 */ +#define fseeko _fseeki64 +#define ftello _ftelli64 +#elif defined(_WIN32) +/* MinGW defines off_t as long, and uses f{seek,tell}o64/off64_t for large + * files */ +#define fseeko fseeko64 +#define ftello ftello64 +#define off_t off64_t +#endif + +#define LITERALU64(hi, lo) ((((uint64_t)hi) << 32) | lo) + +void Ebml_Write(struct EbmlGlobal *glob, + const void *buffer_in, + unsigned long len) { + (void) fwrite(buffer_in, 1, len, glob->stream); +} + +#define WRITE_BUFFER(s) \ +for (i = len - 1; i >= 0; i--) { \ + x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \ + Ebml_Write(glob, &x, 1); \ +} + +void Ebml_Serialize(struct EbmlGlobal *glob, + const void *buffer_in, + int buffer_size, + unsigned long len) { + char x; + int i; + + /* buffer_size: + * 1 - int8_t; + * 2 - int16_t; + * 3 - int32_t; + * 4 - int64_t; + */ + switch (buffer_size) { + case 1: + WRITE_BUFFER(int8_t) + break; + case 2: + WRITE_BUFFER(int16_t) + break; + case 4: + WRITE_BUFFER(int32_t) + break; + case 8: + WRITE_BUFFER(int64_t) + break; + default: + break; + } +} +#undef WRITE_BUFFER + +/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit + * one, but not a 32 bit one. + */ +static void Ebml_SerializeUnsigned32(struct EbmlGlobal *glob, + unsigned int class_id, + uint64_t ui) { + const unsigned char sizeSerialized = 4 | 0x80; + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), 4); +} + +static void Ebml_StartSubElement(struct EbmlGlobal *glob, + EbmlLoc *ebmlLoc, + unsigned int class_id) { + const uint64_t kEbmlUnknownLength = LITERALU64(0x01FFFFFF, 0xFFFFFFFF); + Ebml_WriteID(glob, class_id); + *ebmlLoc = ftello(glob->stream); + Ebml_Serialize(glob, &kEbmlUnknownLength, sizeof(kEbmlUnknownLength), 8); +} + +static void Ebml_EndSubElement(struct EbmlGlobal *glob, EbmlLoc *ebmlLoc) { + off_t pos; + uint64_t size; + + /* Save the current stream pointer. */ + pos = ftello(glob->stream); + + /* Calculate the size of this element. */ + size = pos - *ebmlLoc - 8; + size |= LITERALU64(0x01000000, 0x00000000); + + /* Seek back to the beginning of the element and write the new size. */ + fseeko(glob->stream, *ebmlLoc, SEEK_SET); + Ebml_Serialize(glob, &size, sizeof(size), 8); + + /* Reset the stream pointer. */ + fseeko(glob->stream, pos, SEEK_SET); +} + +void write_webm_seek_element(struct EbmlGlobal *ebml, + unsigned int id, + off_t pos) { + uint64_t offset = pos - ebml->position_reference; + EbmlLoc start; + Ebml_StartSubElement(ebml, &start, Seek); + Ebml_SerializeBinary(ebml, SeekID, id); + Ebml_SerializeUnsigned64(ebml, SeekPosition, offset); + Ebml_EndSubElement(ebml, &start); +} + +void write_webm_seek_info(struct EbmlGlobal *ebml) { + off_t pos; + EbmlLoc start; + EbmlLoc startInfo; + uint64_t frame_time; + char version_string[64]; + + /* Save the current stream pointer. */ + pos = ftello(ebml->stream); + + if (ebml->seek_info_pos) + fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET); + else + ebml->seek_info_pos = pos; + + Ebml_StartSubElement(ebml, &start, SeekHead); + write_webm_seek_element(ebml, Tracks, ebml->track_pos); + write_webm_seek_element(ebml, Cues, ebml->cue_pos); + write_webm_seek_element(ebml, Info, ebml->segment_info_pos); + Ebml_EndSubElement(ebml, &start); + + /* Create and write the Segment Info. */ + if (ebml->debug) { + strcpy(version_string, "vpxenc"); + } else { + strcpy(version_string, "vpxenc "); + strncat(version_string, + vpx_codec_version_str(), + sizeof(version_string) - 1 - strlen(version_string)); + } + + frame_time = (uint64_t)1000 * ebml->framerate.den + / ebml->framerate.num; + ebml->segment_info_pos = ftello(ebml->stream); + Ebml_StartSubElement(ebml, &startInfo, Info); + Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000); + Ebml_SerializeFloat(ebml, Segment_Duration, + (double)(ebml->last_pts_ms + frame_time)); + Ebml_SerializeString(ebml, 0x4D80, version_string); + Ebml_SerializeString(ebml, 0x5741, version_string); + Ebml_EndSubElement(ebml, &startInfo); +} + +void write_webm_file_header(struct EbmlGlobal *glob, + const vpx_codec_enc_cfg_t *cfg, + const struct vpx_rational *fps, + stereo_format_t stereo_fmt, + unsigned int fourcc) { + EbmlLoc start; + EbmlLoc trackStart; + EbmlLoc videoStart; + unsigned int trackNumber = 1; + uint64_t trackID = 0; + unsigned int pixelWidth = cfg->g_w; + unsigned int pixelHeight = cfg->g_h; + + /* Write the EBML header. */ + Ebml_StartSubElement(glob, &start, EBML); + Ebml_SerializeUnsigned(glob, EBMLVersion, 1); + Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); + Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); + Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); + Ebml_SerializeString(glob, DocType, "webm"); + Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); + Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); + Ebml_EndSubElement(glob, &start); + + /* Open and begin writing the segment element. */ + Ebml_StartSubElement(glob, &glob->startSegment, Segment); + glob->position_reference = ftello(glob->stream); + glob->framerate = *fps; + write_webm_seek_info(glob); + + /* Open and write the Tracks element. */ + glob->track_pos = ftello(glob->stream); + Ebml_StartSubElement(glob, &trackStart, Tracks); + + /* Open and write the Track entry. */ + Ebml_StartSubElement(glob, &start, TrackEntry); + Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); + glob->track_id_pos = ftello(glob->stream); + Ebml_SerializeUnsigned32(glob, TrackUID, trackID); + Ebml_SerializeUnsigned(glob, TrackType, 1); + Ebml_SerializeString(glob, CodecID, + fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9"); + Ebml_StartSubElement(glob, &videoStart, Video); + Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); + Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); + Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); + Ebml_EndSubElement(glob, &videoStart); + + /* Close Track entry. */ + Ebml_EndSubElement(glob, &start); + + /* Close Tracks element. */ + Ebml_EndSubElement(glob, &trackStart); + + /* Segment element remains open. */ +} + +void write_webm_block(struct EbmlGlobal *glob, + const vpx_codec_enc_cfg_t *cfg, + const vpx_codec_cx_pkt_t *pkt) { + unsigned int block_length; + unsigned char track_number; + uint16_t block_timecode = 0; + unsigned char flags; + int64_t pts_ms; + int start_cluster = 0, is_keyframe; + + /* Calculate the PTS of this frame in milliseconds. */ + pts_ms = pkt->data.frame.pts * 1000 + * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den; + + if (pts_ms <= glob->last_pts_ms) + pts_ms = glob->last_pts_ms + 1; + + glob->last_pts_ms = pts_ms; + + /* Calculate the relative time of this block. */ + if (pts_ms - glob->cluster_timecode > SHRT_MAX) + start_cluster = 1; + else + block_timecode = (uint16_t)pts_ms - glob->cluster_timecode; + + is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY); + if (start_cluster || is_keyframe) { + if (glob->cluster_open) + Ebml_EndSubElement(glob, &glob->startCluster); + + /* Open the new cluster. */ + block_timecode = 0; + glob->cluster_open = 1; + glob->cluster_timecode = (uint32_t)pts_ms; + glob->cluster_pos = ftello(glob->stream); + Ebml_StartSubElement(glob, &glob->startCluster, Cluster); + Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode); + + /* Save a cue point if this is a keyframe. */ + if (is_keyframe) { + struct cue_entry *cue, *new_cue_list; + + new_cue_list = realloc(glob->cue_list, + (glob->cues + 1) * sizeof(struct cue_entry)); + if (new_cue_list) + glob->cue_list = new_cue_list; + else + fatal("Failed to realloc cue list."); + + cue = &glob->cue_list[glob->cues]; + cue->time = glob->cluster_timecode; + cue->loc = glob->cluster_pos; + glob->cues++; + } + } + + /* Write the Simple Block. */ + Ebml_WriteID(glob, SimpleBlock); + + block_length = (unsigned int)pkt->data.frame.sz + 4; + block_length |= 0x10000000; + Ebml_Serialize(glob, &block_length, sizeof(block_length), 4); + + track_number = 1; + track_number |= 0x80; + Ebml_Write(glob, &track_number, 1); + + Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2); + + flags = 0; + if (is_keyframe) + flags |= 0x80; + if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) + flags |= 0x08; + Ebml_Write(glob, &flags, 1); + + Ebml_Write(glob, pkt->data.frame.buf, (unsigned int)pkt->data.frame.sz); +} + +void write_webm_file_footer(struct EbmlGlobal *glob, int hash) { + EbmlLoc start_cues; + EbmlLoc start_cue_point; + EbmlLoc start_cue_tracks; + unsigned int i; + + if (glob->cluster_open) + Ebml_EndSubElement(glob, &glob->startCluster); + + glob->cue_pos = ftello(glob->stream); + Ebml_StartSubElement(glob, &start_cues, Cues); + + for (i = 0; i < glob->cues; i++) { + struct cue_entry *cue = &glob->cue_list[i]; + Ebml_StartSubElement(glob, &start_cue_point, CuePoint); + Ebml_SerializeUnsigned(glob, CueTime, cue->time); + + Ebml_StartSubElement(glob, &start_cue_tracks, CueTrackPositions); + Ebml_SerializeUnsigned(glob, CueTrack, 1); + Ebml_SerializeUnsigned64(glob, CueClusterPosition, + cue->loc - glob->position_reference); + Ebml_EndSubElement(glob, &start_cue_tracks); + + Ebml_EndSubElement(glob, &start_cue_point); + } + + Ebml_EndSubElement(glob, &start_cues); + + /* Close the Segment. */ + Ebml_EndSubElement(glob, &glob->startSegment); + + /* Patch up the seek info block. */ + write_webm_seek_info(glob); + + /* Patch up the track id. */ + fseeko(glob->stream, glob->track_id_pos, SEEK_SET); + Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash); + + fseeko(glob->stream, 0, SEEK_END); +} diff --git a/webmenc.h b/webmenc.h new file mode 100644 index 000000000..f3bc3ecd2 --- /dev/null +++ b/webmenc.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef WEBMENC_H_ +#define WEBMENC_H_ + +#include <stdio.h> +#include <stdlib.h> + +#if defined(_MSC_VER) +/* MSVS doesn't define off_t */ +typedef __int64 off_t; +#else +#include <stdint.h> +#endif + +#include "tools_common.h" +#include "vpx/vpx_encoder.h" + +typedef off_t EbmlLoc; + +struct cue_entry { + unsigned int time; + uint64_t loc; +}; + +struct EbmlGlobal { + int debug; + + FILE *stream; + int64_t last_pts_ms; + vpx_rational_t framerate; + + /* These pointers are to the start of an element */ + off_t position_reference; + off_t seek_info_pos; + off_t segment_info_pos; + off_t track_pos; + off_t cue_pos; + off_t cluster_pos; + + /* This pointer is to a specific element to be serialized */ + off_t track_id_pos; + + /* These pointers are to the size field of the element */ + EbmlLoc startSegment; + EbmlLoc startCluster; + + uint32_t cluster_timecode; + int cluster_open; + + struct cue_entry *cue_list; + unsigned int cues; +}; + +/* Stereo 3D packed frame format */ +typedef enum stereo_format { + STEREO_FORMAT_MONO = 0, + STEREO_FORMAT_LEFT_RIGHT = 1, + STEREO_FORMAT_BOTTOM_TOP = 2, + STEREO_FORMAT_TOP_BOTTOM = 3, + STEREO_FORMAT_RIGHT_LEFT = 11 +} stereo_format_t; + +void write_webm_seek_element(struct EbmlGlobal *ebml, + unsigned int id, + off_t pos); + +void write_webm_file_header(struct EbmlGlobal *glob, + const vpx_codec_enc_cfg_t *cfg, + const struct vpx_rational *fps, + stereo_format_t stereo_fmt, + unsigned int fourcc); + +void write_webm_block(struct EbmlGlobal *glob, + const vpx_codec_enc_cfg_t *cfg, + const vpx_codec_cx_pkt_t *pkt); + +void write_webm_file_footer(struct EbmlGlobal *glob, int hash); + +#endif // WEBMENC_H_ |