diff options
40 files changed, 1558 insertions, 1488 deletions
@@ -242,8 +242,6 @@ EXPERIMENT_LIST=" implicit_segmentation newbintramodes comp_interintra_pred - tx64x64 - cnvcontext enable_6tap abovesprefmv " diff --git a/examples.mk b/examples.mk index f1cc42bf7..8426ee769 100644 --- a/examples.mk +++ b/examples.mk @@ -8,6 +8,12 @@ ## be found in the AUTHORS file in the root of the source tree. ## +LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \ + third_party/libyuv/include/libyuv/cpu_id.h \ + third_party/libyuv/include/libyuv/scale.h \ + third_party/libyuv/source/row.h \ + third_party/libyuv/source/scale.c \ + third_party/libyuv/source/cpu_id.c # List of examples to build. UTILS are files that are taken from the source # tree directly, and GEN_EXAMPLES are files that are created from the @@ -25,6 +31,7 @@ vpxdec.SRCS += nestegg/halloc/src/hlist.h vpxdec.SRCS += nestegg/halloc/src/macros.h vpxdec.SRCS += nestegg/include/nestegg/nestegg.h vpxdec.SRCS += nestegg/src/nestegg.c +vpxdec.SRCS += $(LIBYUV_SRCS) vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 vpxdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += vpxenc.c @@ -36,6 +43,7 @@ vpxenc.SRCS += vpx_ports/vpx_timer.h vpxenc.SRCS += libmkv/EbmlIDs.h vpxenc.SRCS += libmkv/EbmlWriter.c vpxenc.SRCS += libmkv/EbmlWriter.h +vpxenc.SRCS += $(LIBYUV_SRCS) vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c @@ -99,13 +107,7 @@ vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame # C file is provided, not generated automatically. UTILS-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c -vp8_multi_resolution_encoder.SRCS \ - += third_party/libyuv/include/libyuv/basic_types.h \ - third_party/libyuv/include/libyuv/cpu_id.h \ - third_party/libyuv/include/libyuv/scale.h \ - third_party/libyuv/source/row.h \ - third_party/libyuv/source/scale.c \ - third_party/libyuv/source/cpu_id.c +vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS) vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 56b1becb8..83e0e3cce 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -13,6 +13,7 @@ extern "C" { #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" } #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" @@ -430,19 +431,7 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { } } -TEST_P(ConvolveTest, ChangeFilterWorks) { - uint8_t* const in = input(); - uint8_t* const out = output(); - - const int16_t filters[][8] = { - { 0, 0, 0, 0, 0, 0, 0, 128}, - { 0, 0, 0, 0, 0, 0, 128}, - { 0, 0, 0, 0, 0, 128}, - { 0, 0, 0, 0, 128}, - { 0, 0, 0, 128}, - { 0, 0, 128}, - { 0, 128}, - { 128}, +DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = { { 0, 0, 0, 0, 0, 0, 0, 128}, { 0, 0, 0, 0, 0, 0, 128}, { 0, 0, 0, 0, 0, 128}, @@ -458,32 +447,45 @@ TEST_P(ConvolveTest, ChangeFilterWorks) { { 0, 0, 0, 128}, { 0, 0, 128}, { 0, 128}, - { 128}, - }; + { 128} +}; + +TEST_P(ConvolveTest, ChangeFilterWorks) { + uint8_t* const in = input(); + uint8_t* const out = output(); REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride, - filters[0], 17, filters[4], 16, + kChangeFilters[8], 17, kChangeFilters[4], 16, Width(), Height())); - for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) { - ASSERT_EQ(in[4], out[x]) << "x == " << x; + for (int x = 0; x < Width(); ++x) { + if (x < 8) + ASSERT_EQ(in[4], out[x]) << "x == " << x; + else + ASSERT_EQ(in[12], out[x]) << "x == " << x; } REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride, - filters[4], 16, filters[0], 17, + kChangeFilters[4], 16, kChangeFilters[8], 17, Width(), Height())); - for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) { - ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y; + for (int y = 0; y < Height(); ++y) { + if (y < 8) + ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y; + else + ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y; } REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride, - filters[0], 17, filters[0], 17, + kChangeFilters[8], 17, kChangeFilters[8], 17, Width(), Height())); - for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) { - for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) { - ASSERT_EQ(in[4 * kInputStride + 4], out[y * kOutputStride + x]) + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + const int ref_x = x < 8 ? 4 : 12; + const int ref_y = y < 8 ? 4 : 12; + + ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x]) << "x == " << x << ", y == " << y; } } @@ -506,10 +508,12 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_2d_only_c), make_tuple(8, 4, &convolve8_2d_only_c), make_tuple(8, 8, &convolve8_2d_only_c), + make_tuple(16, 8, &convolve8_2d_only_c), make_tuple(16, 16, &convolve8_2d_only_c), make_tuple(4, 4, &convolve8_c), make_tuple(8, 4, &convolve8_c), make_tuple(8, 8, &convolve8_c), + make_tuple(16, 8, &convolve8_c), make_tuple(16, 16, &convolve8_c))); } @@ -523,5 +527,6 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_ssse3), make_tuple(8, 4, &convolve8_ssse3), make_tuple(8, 8, &convolve8_ssse3), + make_tuple(16, 8, &convolve8_ssse3), make_tuple(16, 16, &convolve8_ssse3))); #endif diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index d14e0aa2f..a56527099 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -115,15 +115,6 @@ TEST(VP9Idct32x32Test, AccuracyCheck) { << "Error: 3x32 IDCT has error " << error << " at index " << j; } - - vp9_short_fdct32x32_c(in, out_c, 64); - for (int j = 0; j < 1024; ++j) { - const double diff = coeff[j] - out_c[j]; - const double error = diff * diff; - EXPECT_GE(1.0, error) - << "Error: 32x32 FDCT has error " << error - << " at index " << j; - } } } @@ -157,8 +148,8 @@ TEST(VP9Fdct32x32Test, AccuracyCheck) { EXPECT_GE(1u, max_error) << "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1"; - EXPECT_GE(count_test_block/10, total_error) - << "Error: 32x32 FDCT/IDCT has average roundtrip error > 1/10 per block"; + EXPECT_GE(count_test_block, total_error) + << "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block"; } TEST(VP9Fdct32x32Test, CoeffSizeCheck) { diff --git a/test/resize_test.cc b/test/resize_test.cc index 5e9234c38..2c62dafe3 100644 --- a/test/resize_test.cc +++ b/test/resize_test.cc @@ -101,7 +101,7 @@ TEST_P(ResizeTest, TestExternalResizeWorks) { class ResizeInternalTest : public ResizeTest { protected: - ResizeInternalTest() : ResizeTest() {} + ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {} virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { @@ -109,18 +109,33 @@ class ResizeInternalTest : public ResizeTest { struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE}; encoder->Control(VP8E_SET_SCALEMODE, &mode); } + if (video->frame() == 6) { + struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } } + + virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { + if (!frame0_psnr_) + frame0_psnr_ = pkt->data.psnr.psnr[0]; + ASSERT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 0.025*frame0_psnr_); + } + + double frame0_psnr_; }; TEST_P(ResizeInternalTest, TestInternalResizeWorks) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 5); + 30, 1, 0, 10); + init_flags_ = VPX_CODEC_USE_PSNR; + // q picked such that initial keyframe on this clip is ~30dB PSNR + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const vpx_codec_pts_t pts = info->pts; - if (pts >= 3) { + if (pts >= 3 && pts < 6) { ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width"; ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height"; } else { diff --git a/test/test.mk b/test/test.mk index d879f9fb2..3d56bd838 100644 --- a/test/test.mk +++ b/test/test.mk @@ -76,7 +76,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc -#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc endif # VP9 diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index d1da5fe0c..2f60e38fa 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -288,6 +288,18 @@ typedef struct superblockd { DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]); } SUPERBLOCKD; +struct scale_factors { + int x_num; + int x_den; + int x_offset_q4; + int x_step_q4; + int y_num; + int y_den; + int y_offset_q4; + int y_step_q4; + convolve_fn_t predict[2][2][2]; // horiz, vert, avg +}; + typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */ DECLARE_ALIGNED(16, uint8_t, predictor[384]); @@ -303,6 +315,8 @@ typedef struct macroblockd { YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ YV12_BUFFER_CONFIG second_pre; YV12_BUFFER_CONFIG dst; + struct scale_factors scale_factor[2]; + struct scale_factors scale_factor_uv[2]; MODE_INFO *prev_mode_info_context; MODE_INFO *mode_info_context; diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index b87c410df..b062e7dc7 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -19,7 +19,6 @@ #define VP9_FILTER_WEIGHT 128 #define VP9_FILTER_SHIFT 7 -#define ALIGN_FILTERS_256 0 /* Assume a bank of 16 filters to choose from. There are two implementations * for filter wrapping behavior, since we want to be able to pick which filter @@ -34,8 +33,11 @@ * always 256 byte aligned. * * Implementations 2 and 3 are likely preferable, as they avoid an extra 2 - * parameters, and switching between them is trivial. + * parameters, and switching between them is trivial, with the + * ALIGN_FILTERS_256 macro, below. */ + #define ALIGN_FILTERS_256 1 + static void convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x0, int x_step_q4, @@ -56,11 +58,12 @@ static void convolve_horiz_c(const uint8_t *src, int src_stride, const int16_t *filter_x = filter_x0; /* Initial phase offset */ - int x_q4 = (filter_x - filter_x_base) / taps; + int x0_q4 = (filter_x - filter_x_base) / taps; + int x_q4 = x0_q4; for (x = 0; x < w; ++x) { /* Per-pixel src offset */ - int src_x = x_q4 >> 4; + int src_x = (x_q4 - x0_q4) >> 4; for (sum = 0, k = 0; k < taps; ++k) { sum += src[src_x + k] * filter_x[k]; @@ -97,11 +100,12 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride, const int16_t *filter_x = filter_x0; /* Initial phase offset */ - int x_q4 = (filter_x - filter_x_base) / taps; + int x0_q4 = (filter_x - filter_x_base) / taps; + int x_q4 = x0_q4; for (x = 0; x < w; ++x) { /* Per-pixel src offset */ - int src_x = x_q4 >> 4; + int src_x = (x_q4 - x0_q4) >> 4; for (sum = 0, k = 0; k < taps; ++k) { sum += src[src_x + k] * filter_x[k]; @@ -138,11 +142,12 @@ static void convolve_vert_c(const uint8_t *src, int src_stride, const int16_t *filter_y = filter_y0; /* Initial phase offset */ - int y_q4 = (filter_y - filter_y_base) / taps; + int y0_q4 = (filter_y - filter_y_base) / taps; + int y_q4 = y0_q4; for (y = 0; y < h; ++y) { /* Per-pixel src offset */ - int src_y = y_q4 >> 4; + int src_y = (y_q4 - y0_q4) >> 4; for (sum = 0, k = 0; k < taps; ++k) { sum += src[(src_y + k) * src_stride] * filter_y[k]; @@ -179,11 +184,12 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride, const int16_t *filter_y = filter_y0; /* Initial phase offset */ - int y_q4 = (filter_y - filter_y_base) / taps; + int y0_q4 = (filter_y - filter_y_base) / taps; + int y_q4 = y0_q4; for (y = 0; y < h; ++y) { /* Per-pixel src offset */ - int src_y = y_q4 >> 4; + int src_y = (y_q4 - y0_q4) >> 4; for (sum = 0, k = 0; k < taps; ++k) { sum += src[(src_y + k) * src_stride] * filter_y[k]; @@ -206,16 +212,25 @@ static void convolve_c(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { - /* Fixed size intermediate buffer places limits on parameters. */ - uint8_t temp[16 * 23]; + /* Fixed size intermediate buffer places limits on parameters. + * Maximum intermediate_height is 39, for y_step_q4 == 32, + * h == 16, taps == 8. + */ + uint8_t temp[16 * 39]; + int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1; + assert(w <= 16); assert(h <= 16); assert(taps <= 8); + assert(y_step_q4 <= 32); + + if (intermediate_height < h) + intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 16, filter_x, x_step_q4, filter_y, y_step_q4, - w, h + taps - 1, taps); + w, intermediate_height, taps); convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, taps); @@ -226,16 +241,25 @@ static void convolve_avg_c(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { - /* Fixed size intermediate buffer places limits on parameters. */ - uint8_t temp[16 * 23]; + /* Fixed size intermediate buffer places limits on parameters. + * Maximum intermediate_height is 39, for y_step_q4 == 32, + * h == 16, taps == 8. + */ + uint8_t temp[16 * 39]; + int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1; + assert(w <= 16); assert(h <= 16); assert(taps <= 8); + assert(y_step_q4 <= 32); + + if (intermediate_height < h) + intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 16, filter_x, x_step_q4, filter_y, y_step_q4, - w, h + taps - 1, taps); + w, intermediate_height, taps); convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, taps); @@ -318,25 +342,17 @@ void vp9_convolve_copy(const uint8_t *src, int src_stride, const int16_t *filter_x, int filter_x_stride, const int16_t *filter_y, int filter_y_stride, int w, int h) { - if (h == 16) { + if (w == 16 && h == 16) { vp9_copy_mem16x16(src, src_stride, dst, dst_stride); - } else if (h == 8) { + } else if (w == 8 && h == 8) { vp9_copy_mem8x8(src, src_stride, dst, dst_stride); - } else if (w == 8) { + } else if (w == 8 && h == 4) { vp9_copy_mem8x4(src, src_stride, dst, dst_stride); } else { - // 4x4 int r; - for (r = 0; r < 4; ++r) { -#if !(CONFIG_FAST_UNALIGNED) - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -#else - *(uint32_t *)dst = *(const uint32_t *)src; -#endif + for (r = h; r > 0; --r) { + memcpy(dst, src, w); src += src_stride; dst += dst_stride; } diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h index 46c935ab7..8c4856187 100644 --- a/vp9/common/vp9_convolve.h +++ b/vp9/common/vp9_convolve.h @@ -33,11 +33,8 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, int w, int h); struct subpix_fn_table { - convolve_fn_t predict[2][2][2]; // horiz, vert, avg const int16_t (*filter_x)[8]; const int16_t (*filter_y)[8]; - int x_step_q4; - int y_step_q4; }; #endif // VP9_COMMON_CONVOLVE_H_ diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 5e425895f..434c63e7e 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -15,7 +15,7 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_common.h" -DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, { 0, 0, 0, 112, 16, 0, 0, 0 }, @@ -36,7 +36,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = { #define FILTER_ALPHA 0 #define FILTER_ALPHA_SHARP 1 -DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) + = { #if FILTER_ALPHA == 0 /* Lagrangian interpolation filter */ { 0, 0, 0, 128, 0, 0, 0, 0}, @@ -55,6 +56,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { { -1, 3, -9, 27, 118, -13, 4, -1}, { 0, 2, -6, 18, 122, -10, 3, -1}, { 0, 1, -3, 8, 126, -5, 1, 0} + #elif FILTER_ALPHA == 50 /* Generated using MATLAB: * alpha = 0.5; @@ -82,7 +84,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { #endif /* FILTER_ALPHA */ }; -DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) + = { #if FILTER_ALPHA_SHARP == 1 /* dct based filter */ {0, 0, 0, 128, 0, 0, 0, 0}, @@ -101,6 +104,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { {-2, 5, -10, 27, 121, -17, 7, -3}, {-1, 3, -6, 17, 125, -13, 5, -2}, {0, 1, -3, 8, 127, -7, 3, -1} + #elif FILTER_ALPHA_SHARP == 75 /* alpha = 0.75 */ {0, 0, 0, 128, 0, 0, 0, 0}, @@ -122,7 +126,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { #endif /* FILTER_ALPHA_SHARP */ }; -DECLARE_ALIGNED(16, const int16_t, +DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = { /* 8-tap lowpass filter */ /* Hamming window */ @@ -144,7 +148,8 @@ DECLARE_ALIGNED(16, const int16_t, { 1, -2, -7, 37, 80, 28, -8, -1} }; -DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]) + = { {0, 0, 0, 128, 0, 0, 0, 0}, {0, 1, -5, 125, 8, -2, 1, 0}, {0, 1, -8, 122, 17, -5, 1, 0}, diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index d25d0ac2a..3e0ee4b63 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -68,4 +68,11 @@ static INLINE int dct_const_round_shift(int input) { assert(INT16_MIN <= rv && rv <= INT16_MAX); return rv; } + +static INLINE int dct_32_round(int input) { + int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS; + assert(-131072 <= rv && rv <= 131071); + return rv; +} + #endif diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 19397028b..f34823b36 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -115,7 +115,7 @@ void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr, } } -void idct4_1d(int16_t *input, int16_t *output) { +static void idct4_1d(int16_t *input, int16_t *output) { int16_t step[4]; int temp1, temp2; // stage 1 @@ -193,7 +193,7 @@ void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr, } } -void idct8_1d(int16_t *input, int16_t *output) { +static void idct8_1d(int16_t *input, int16_t *output) { int16_t step1[8], step2[8]; int temp1, temp2; // stage 1 @@ -313,10 +313,9 @@ static const transform_2d IHT_4[] = { void vp9_short_iht4x4_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { + int i, j; int16_t out[4 * 4]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; - int i, j; int16_t temp_in[4], temp_out[4]; const transform_2d ht = IHT_4[tx_type]; @@ -333,7 +332,7 @@ void vp9_short_iht4x4_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 4 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 4; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4); + output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4); } } @@ -423,10 +422,9 @@ static const transform_2d IHT_8[] = { void vp9_short_iht8x8_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { + int i, j; int16_t out[8 * 8]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; - int i, j; int16_t temp_in[8], temp_out[8]; const transform_2d ht = IHT_8[tx_type]; @@ -443,7 +441,7 @@ void vp9_short_iht8x8_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 8 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5); + output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5); } } @@ -479,7 +477,7 @@ void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) { output[0] = ROUND_POWER_OF_TWO(out, 5); } -void idct16_1d(int16_t *input, int16_t *output) { +static void idct16_1d(int16_t *input, int16_t *output) { int16_t step1[16], step2[16]; int temp1, temp2; @@ -846,18 +844,17 @@ static const transform_2d IHT_16[] = { }; void vp9_short_iht16x16_c(int16_t *input, int16_t *output, - int pitch, TX_TYPE tx_type) { + int input_pitch, TX_TYPE tx_type) { + int i, j; int16_t out[16 * 16]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; - int i, j; int16_t temp_in[16], temp_out[16]; const transform_2d ht = IHT_16[tx_type]; // Rows for (i = 0; i < 16; ++i) { ht.rows(input, outptr); - input += half_pitch; + input += input_pitch; outptr += 16; } @@ -905,7 +902,7 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { output[0] = ROUND_POWER_OF_TWO(out, 6); } -void idct32_1d(int16_t *input, int16_t *output) { +static void idct32_1d(int16_t *input, int16_t *output) { int16_t step1[32], step2[32]; int temp1, temp2; diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index c6b961894..d431ea24b 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -25,8 +25,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { for (i = 0; i < 16; i++) { TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, - 32, tx_type); + vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); } else { vp9_inverse_transform_b_4x4(xd, i, 32); } @@ -58,8 +57,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { for (i = 0; i < 9; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, - 32, tx_type); + vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); } else { vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], &blockd[i].diff[0], 32); @@ -69,7 +67,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff, - 32, tx_type); + 16, tx_type); } else { vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], &blockd[i].diff[0], 32); @@ -101,7 +99,7 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) { BLOCKD *bd = &xd->block[0]; TX_TYPE tx_type = get_tx_type_16x16(xd, bd); if (tx_type != DCT_DCT) { - vp9_short_iht16x16(bd->dqcoeff, bd->diff, 32, tx_type); + vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type); } else { vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], &xd->block[0].diff[0], 32); diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h index 8acd4046b..a1eef4649 100644 --- a/vp9/common/vp9_mv.h +++ b/vp9/common/vp9_mv.h @@ -23,4 +23,14 @@ typedef union int_mv { MV as_mv; } int_mv; /* facilitates faster equality tests and copies */ +struct mv32 { + int32_t row; + int32_t col; +}; + +typedef union int_mv32 { + uint64_t as_int; + struct mv32 as_mv; +} int_mv32; /* facilitates faster equality tests and copies */ + #endif // VP9_COMMON_VP9_MV_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index e952fe933..c4bb12340 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -39,7 +39,11 @@ void vp9_initialize_common(void); #define NUM_REF_FRAMES 3 #define NUM_REF_FRAMES_LG2 2 -#define NUM_YV12_BUFFERS (NUM_REF_FRAMES + 1) + +// 1 scratch frame for the new frame, 3 for scaled references on the encoder +// TODO(jkoleszar): These 3 extra references could probably come from the +// normal reference pool. +#define NUM_YV12_BUFFERS (NUM_REF_FRAMES + 4) #define NUM_FRAME_CONTEXTS_LG2 2 #define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LG2) @@ -128,6 +132,8 @@ typedef struct VP9Common { int Width; int Height; + int last_width; + int last_height; int horiz_scale; int vert_scale; @@ -145,6 +151,7 @@ typedef struct VP9Common { */ int active_ref_idx[3]; /* each frame can reference 3 buffers */ int new_fb_idx; + struct scale_factors active_ref_scale[3]; YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index b75525e2c..30e8951af 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -17,26 +17,97 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" -void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATIONFILTERTYPE mcomp_filter_type, - VP9_COMMON *cm) { +void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, + YV12_BUFFER_CONFIG *other, + int this_w, int this_h) { + int other_w, other_h; + + other_h = other->y_height; + other_w = other->y_width; + scale->x_num = other_w; + scale->x_den = this_w; + scale->x_offset_q4 = 0; // calculated per-mb + scale->x_step_q4 = 16 * other_w / this_w; + scale->y_num = other_h; + scale->y_den = this_h; + scale->y_offset_q4 = 0; // calculated per-mb + scale->y_step_q4 = 16 * other_h / this_h; + // TODO(agrange): Investigate the best choice of functions to use here // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what // to do at full-pel offsets. The current selection, where the filter is // applied in one direction only, and not at all for 0,0, seems to give the // best quality, but it may be worth trying an additional mode that does // do the filtering on full-pel. - xd->subpix.predict[0][0][0] = vp9_convolve_copy; - xd->subpix.predict[0][0][1] = vp9_convolve_avg; - xd->subpix.predict[0][1][0] = vp9_convolve8_vert; - xd->subpix.predict[0][1][1] = vp9_convolve8_avg_vert; - xd->subpix.predict[1][0][0] = vp9_convolve8_horiz; - xd->subpix.predict[1][0][1] = vp9_convolve8_avg_horiz; - xd->subpix.predict[1][1][0] = vp9_convolve8; - xd->subpix.predict[1][1][1] = vp9_convolve8_avg; - - xd->subpix.x_step_q4 = 16; - xd->subpix.y_step_q4 = 16; + if (scale->x_step_q4 == 16) { + if (scale->y_step_q4 == 16) { + // No scaling in either direction. + scale->predict[0][0][0] = vp9_convolve_copy; + scale->predict[0][0][1] = vp9_convolve_avg; + scale->predict[0][1][0] = vp9_convolve8_vert; + scale->predict[0][1][1] = vp9_convolve8_avg_vert; + scale->predict[1][0][0] = vp9_convolve8_horiz; + scale->predict[1][0][1] = vp9_convolve8_avg_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + scale->predict[0][0][0] = vp9_convolve8_vert; + scale->predict[0][0][1] = vp9_convolve8_avg_vert; + scale->predict[0][1][0] = vp9_convolve8_vert; + scale->predict[0][1][1] = vp9_convolve8_avg_vert; + scale->predict[1][0][0] = vp9_convolve8; + scale->predict[1][0][1] = vp9_convolve8_avg; + } + } else { + if (scale->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + scale->predict[0][0][0] = vp9_convolve8_horiz; + scale->predict[0][0][1] = vp9_convolve8_avg_horiz; + scale->predict[0][1][0] = vp9_convolve8; + scale->predict[0][1][1] = vp9_convolve8_avg; + scale->predict[1][0][0] = vp9_convolve8_horiz; + scale->predict[1][0][1] = vp9_convolve8_avg_horiz; + } else { + // Must always scale in both directions. + scale->predict[0][0][0] = vp9_convolve8; + scale->predict[0][0][1] = vp9_convolve8_avg; + scale->predict[0][1][0] = vp9_convolve8; + scale->predict[0][1][1] = vp9_convolve8_avg; + scale->predict[1][0][0] = vp9_convolve8; + scale->predict[1][0][1] = vp9_convolve8_avg; + } + } + // 2D subpel motion always gets filtered in both directions + scale->predict[1][1][0] = vp9_convolve8; + scale->predict[1][1][1] = vp9_convolve8_avg; +} + +void vp9_setup_interp_filters(MACROBLOCKD *xd, + INTERPOLATIONFILTERTYPE mcomp_filter_type, + VP9_COMMON *cm) { + int i; + + /* Calculate scaling factors for each of the 3 available references */ + for (i = 0; i < 3; ++i) { + if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) { + memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i])); + continue; + } + + vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i], + &cm->yv12_fb[cm->active_ref_idx[i]], + cm->mb_cols * 16, cm->mb_rows * 16); + } + + if (xd->mode_info_context) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + + set_scale_factors(xd, + mbmi->ref_frame - 1, + mbmi->second_ref_frame - 1, + cm->active_ref_scale); + } + + switch (mcomp_filter_type) { case EIGHTTAP: case SWITCHABLE: @@ -57,6 +128,7 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, break; #endif } + assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); } void vp9_copy_mem16x16_c(const uint8_t *src, @@ -146,113 +218,151 @@ void vp9_copy_mem8x4_c(const uint8_t *src, } } -void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, - struct subpix_fn_table *subpix) { - uint8_t *ptr_base; - uint8_t *ptr; - uint8_t *pred_ptr = d->predictor; - int_mv mv; - - ptr_base = *(d->base_pre); - mv.as_int = d->bmi.as_mv[0].as_int; - ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + - (mv.as_mv.col >> 3); - - subpix->predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][0]( - ptr, d->pre_stride, pred_ptr, pitch, - subpix->filter_x[(mv.as_mv.col & 7) << 1], subpix->x_step_q4, - subpix->filter_y[(mv.as_mv.row & 7) << 1], subpix->y_step_q4, - 4, 4); +static void set_scaled_offsets(struct scale_factors *scale, + int row, int col) { + const int x_q4 = 16 * col; + const int y_q4 = 16 * row; + + scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf; + scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf; } -/* - * Similar to vp9_build_inter_predictors_b(), but instead of storing the - * results in d->predictor, we average the contents of d->predictor (which - * come from an earlier call to vp9_build_inter_predictors_b()) with the - * predictor of the second reference frame / motion vector. - */ -void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch, - struct subpix_fn_table *subpix) { - uint8_t *ptr_base; - uint8_t *ptr; - uint8_t *pred_ptr = d->predictor; - int_mv mv; - - ptr_base = *(d->base_second_pre); - mv.as_int = d->bmi.as_mv[1].as_int; - ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + - (mv.as_mv.col >> 3); - - subpix->predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][1]( - ptr, d->pre_stride, pred_ptr, pitch, - subpix->filter_x[(mv.as_mv.col & 7) << 1], subpix->x_step_q4, - subpix->filter_y[(mv.as_mv.row & 7) << 1], subpix->y_step_q4, - 4, 4); +static int32_t scale_motion_vector_component_q3(int mv_q3, + int num, + int den, + int offset_q4) { + // returns the scaled and offset value of the mv component. + const int32_t mv_q4 = mv_q3 << 1; + + /* TODO(jkoleszar): make fixed point, or as a second multiply? */ + return mv_q4 * num / den + offset_q4; } -void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { - uint8_t *ptr_base; - uint8_t *ptr; - uint8_t *pred_ptr = d->predictor; - int_mv mv; - - ptr_base = *(d->base_pre); - mv.as_int = d->bmi.as_mv[0].as_int; - ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + - (mv.as_mv.col >> 3); - - xd->subpix.predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][0]( - ptr, d->pre_stride, pred_ptr, pitch, - xd->subpix.filter_x[(mv.as_mv.col & 7) << 1], xd->subpix.x_step_q4, - xd->subpix.filter_y[(mv.as_mv.row & 7) << 1], xd->subpix.y_step_q4, - 8, 8); +static int32_t scale_motion_vector_component_q4(int mv_q4, + int num, + int den, + int offset_q4) { + // returns the scaled and offset value of the mv component. + + /* TODO(jkoleszar): make fixed point, or as a second multiply? */ + return mv_q4 * num / den + offset_q4; } -/* - * Similar to build_inter_predictors_4b(), but instead of storing the - * results in d->predictor, we average the contents of d->predictor (which - * come from an earlier call to build_inter_predictors_4b()) with the - * predictor of the second reference frame / motion vector. +static int_mv32 scale_motion_vector_q3_to_q4( + const int_mv *src_mv, + const struct scale_factors *scale) { + // returns mv * scale + offset + int_mv32 result; + + result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row, + scale->y_num, + scale->y_den, + scale->y_offset_q4); + result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col, + scale->x_num, + scale->x_den, + scale->x_offset_q4); + return result; +} + +void vp9_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int_mv *mv_q3, + const struct scale_factors *scale, + int w, int h, int do_avg, + const struct subpix_fn_table *subpix) { + int_mv32 mv; + + mv = scale_motion_vector_q3_to_q4(mv_q3, scale); + src = src + (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); + + scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg]( + src, src_stride, dst, dst_stride, + subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4, + subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4, + w, h); +} + +/* Like vp9_build_inter_predictor, but takes the full-pel part of the + * mv separately, and the fractional part as a q4. */ -void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd, - BLOCKD *d, int pitch) { - uint8_t *ptr_base; - uint8_t *ptr; - uint8_t *pred_ptr = d->predictor; - int_mv mv; - - ptr_base = *(d->base_second_pre); - mv.as_int = d->bmi.as_mv[1].as_int; - ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + - (mv.as_mv.col >> 3); - - xd->subpix.predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][1]( - ptr, d->pre_stride, pred_ptr, pitch, - xd->subpix.filter_x[(mv.as_mv.col & 7) << 1], xd->subpix.x_step_q4, - xd->subpix.filter_y[(mv.as_mv.row & 7) << 1], xd->subpix.y_step_q4, - 8, 8); +void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int_mv *fullpel_mv_q3, + const int_mv *frac_mv_q4, + const struct scale_factors *scale, + int w, int h, int do_avg, + const struct subpix_fn_table *subpix) { + const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4) + + (frac_mv_q4->as_mv.row & 0xf); + const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4) + + (frac_mv_q4->as_mv.col & 0xf); + const int scaled_mv_row_q4 = + scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den, + scale->y_offset_q4); + const int scaled_mv_col_q4 = + scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den, + scale->x_offset_q4); + const int subpel_x = scaled_mv_col_q4 & 15; + const int subpel_y = scaled_mv_row_q4 & 15; + + src = src + (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4); + scale->predict[!!subpel_x][!!subpel_y][do_avg]( + src, src_stride, dst, dst_stride, + subpix->filter_x[subpel_x], scale->x_step_q4, + subpix->filter_y[subpel_y], scale->y_step_q4, + w, h); } -static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { - uint8_t *ptr_base; - uint8_t *ptr; - uint8_t *pred_ptr = d->predictor; - int_mv mv; - - ptr_base = *(d->base_pre); - mv.as_int = d->bmi.as_mv[0].as_int; - ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + - (mv.as_mv.col >> 3); - - xd->subpix.predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][0]( - ptr, d->pre_stride, pred_ptr, pitch, - xd->subpix.filter_x[(mv.as_mv.col & 7) << 1], xd->subpix.x_step_q4, - xd->subpix.filter_y[(mv.as_mv.row & 7) << 1], xd->subpix.y_step_q4, - 8, 4); +static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, + struct scale_factors *scale, + int block_size, int stride, int which_mv, + const struct subpix_fn_table *subpix, + int row, int col) { + assert(d1->predictor - d0->predictor == block_size); + assert(d1->pre == d0->pre + block_size); + + set_scaled_offsets(&scale[which_mv], row, col); + + if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { + uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; + + vp9_build_inter_predictor(*base_pre + d0->pre, + d0->pre_stride, + d0->predictor, stride, + &d0->bmi.as_mv[which_mv], + &scale[which_mv], + 2 * block_size, block_size, which_mv, + subpix); + + } else { + uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; + uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre; + + vp9_build_inter_predictor(*base_pre0 + d0->pre, + d0->pre_stride, + d0->predictor, stride, + &d0->bmi.as_mv[which_mv], + &scale[which_mv], + block_size, block_size, which_mv, + subpix); + + set_scaled_offsets(&scale[which_mv], row, col + block_size); + + vp9_build_inter_predictor(*base_pre1 + d1->pre, + d1->pre_stride, + d1->predictor, stride, + &d1->bmi.as_mv[which_mv], + &scale[which_mv], + block_size, block_size, which_mv, + subpix); + } } /*encoder only*/ -void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { +void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, + int mb_row, + int mb_col) { int i, j; BLOCKD *blockd = xd->block; @@ -329,19 +439,17 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { } for (i = 16; i < 24; i += 2) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + const int x = 4 * (i & 1); + const int y = ((i - 16) >> 1) * 4; + + int which_mv; BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; - if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int) - build_inter_predictors2b(xd, d0, 8); - else { - vp9_build_inter_predictors_b(d0, 8, &xd->subpix); - vp9_build_inter_predictors_b(d1, 8, &xd->subpix); - } - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter_predictors_b(d0, 8, &xd->subpix); - vp9_build_2nd_inter_predictors_b(d1, 8, &xd->subpix); + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, + &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); } } } @@ -383,91 +491,100 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { } /*encoder only*/ -void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int clamp_mvs) { - uint8_t *ptr_base = xd->pre.y_buffer; - uint8_t *ptr; - int pre_stride = xd->block[0].pre_stride; - int_mv ymv; - - ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int; - - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - - ptr = ptr_base + (ymv.as_mv.row >> 3) * pre_stride + (ymv.as_mv.col >> 3); - - xd->subpix.predict[!!(ymv.as_mv.col & 7)][!!(ymv.as_mv.row & 7)][0]( - ptr, pre_stride, dst_y, dst_ystride, - xd->subpix.filter_x[(ymv.as_mv.col & 7) << 1], xd->subpix.x_step_q4, - xd->subpix.filter_y[(ymv.as_mv.row & 7) << 1], xd->subpix.y_step_q4, - 16, 16); -} - -void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride) { - int offset; - uint8_t *uptr, *vptr; - int pre_stride = xd->block[0].pre_stride; - int_mv _o16x16mv; - int_mv _16x16mv; - - _16x16mv.as_int = xd->mode_info_context->mbmi.mv[0].as_int; - - if (xd->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); - - _o16x16mv = _16x16mv; - /* calc uv motion vectors */ - if (_16x16mv.as_mv.row < 0) - _16x16mv.as_mv.row -= 1; - else - _16x16mv.as_mv.row += 1; - - if (_16x16mv.as_mv.col < 0) - _16x16mv.as_mv.col -= 1; - else - _16x16mv.as_mv.col += 1; - - _16x16mv.as_mv.row /= 2; - _16x16mv.as_mv.col /= 2; - - _16x16mv.as_mv.row &= xd->fullpixel_mask; - _16x16mv.as_mv.col &= xd->fullpixel_mask; - - pre_stride >>= 1; - offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); - uptr = xd->pre.u_buffer + offset; - vptr = xd->pre.v_buffer + offset; - - xd->subpix.predict[!!(_o16x16mv.as_mv.col & 15)] - [!!(_o16x16mv.as_mv.row & 15)][0]( - uptr, pre_stride, dst_u, dst_uvstride, - xd->subpix.filter_x[_o16x16mv.as_mv.col & 15], xd->subpix.x_step_q4, - xd->subpix.filter_y[_o16x16mv.as_mv.row & 15], xd->subpix.y_step_q4, - 8, 8); - - xd->subpix.predict[!!(_o16x16mv.as_mv.col & 15)] - [!!(_o16x16mv.as_mv.row & 15)][0]( - vptr, pre_stride, dst_v, dst_uvstride, - xd->subpix.filter_x[_o16x16mv.as_mv.col & 15], xd->subpix.x_step_q4, - xd->subpix.filter_y[_o16x16mv.as_mv.row & 15], xd->subpix.y_step_q4, - 8, 8); +void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + int which_mv; + + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + const int clamp_mvs = + which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv + : xd->mode_info_context->mbmi.need_to_clamp_mvs; + uint8_t *base_pre; + int_mv ymv; + int pre_stride; + + ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; + base_pre = which_mv ? xd->second_pre.y_buffer + : xd->pre.y_buffer; + pre_stride = which_mv ? xd->second_pre.y_stride + : xd->pre.y_stride; + if (clamp_mvs) + clamp_mv_to_umv_border(&ymv.as_mv, xd); + + set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); + + vp9_build_inter_predictor(base_pre, pre_stride, + dst_y, dst_ystride, + &ymv, &xd->scale_factor[which_mv], + 16, 16, which_mv, &xd->subpix); + } } - -void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, int dst_uvstride) { - vp9_build_1st_inter16x16_predictors_mby(xd, dst_y, dst_ystride, - xd->mode_info_context->mbmi.need_to_clamp_mvs); - vp9_build_1st_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride); +void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col) { + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + int which_mv; + + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + const int clamp_mvs = + which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv + : xd->mode_info_context->mbmi.need_to_clamp_mvs; + uint8_t *uptr, *vptr; + int pre_stride = which_mv ? xd->second_pre.y_stride + : xd->pre.y_stride; + int_mv _o16x16mv; + int_mv _16x16mv; + + _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; + + if (clamp_mvs) + clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); + + _o16x16mv = _16x16mv; + /* calc uv motion vectors */ + if (_16x16mv.as_mv.row < 0) + _16x16mv.as_mv.row -= 1; + else + _16x16mv.as_mv.row += 1; + + if (_16x16mv.as_mv.col < 0) + _16x16mv.as_mv.col -= 1; + else + _16x16mv.as_mv.col += 1; + + _16x16mv.as_mv.row /= 2; + _16x16mv.as_mv.col /= 2; + + _16x16mv.as_mv.row &= xd->fullpixel_mask; + _16x16mv.as_mv.col &= xd->fullpixel_mask; + + pre_stride >>= 1; + uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); + vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); + + set_scaled_offsets(&xd->scale_factor_uv[which_mv], + mb_row * 16, mb_col * 16); + + vp9_build_inter_predictor_q4(uptr, pre_stride, + dst_u, dst_uvstride, + &_16x16mv, &_o16x16mv, + &xd->scale_factor_uv[which_mv], + 8, 8, which_mv, &xd->subpix); + + vp9_build_inter_predictor_q4(vptr, pre_stride, + dst_v, dst_uvstride, + &_16x16mv, &_o16x16mv, + &xd->scale_factor_uv[which_mv], + 8, 8, which_mv, &xd->subpix); + } } void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, @@ -475,7 +592,9 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride) { + int dst_uvstride, + int mb_row, + int mb_col) { uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; @@ -488,32 +607,43 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; + int scaled_uv_offset; x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); - x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride + x_idx * 16; - x->pre.u_buffer = u1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; - x->pre.v_buffer = v1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->pre.y_stride, + &x->scale_factor[0]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, + y_idx * 8, + x->pre.uv_stride, + &x->scale_factor_uv[0]); + x->pre.u_buffer = u1 + scaled_uv_offset; + x->pre.v_buffer = v1 + scaled_uv_offset; - vp9_build_1st_inter16x16_predictors_mb(x, - dst_y + y_idx * 16 * dst_ystride + x_idx * 16, - dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_ystride, dst_uvstride); if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride + x_idx * 16; - x->second_pre.u_buffer = u2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; - x->second_pre.v_buffer = v2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->second_pre.y_stride, + &x->scale_factor[1]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, + y_idx * 8, + x->second_pre.uv_stride, + &x->scale_factor_uv[1]); + x->second_pre.u_buffer = u2 + scaled_uv_offset; + x->second_pre.v_buffer = v2 + scaled_uv_offset; + } - vp9_build_2nd_inter16x16_predictors_mb(x, + vp9_build_inter16x16_predictors_mb(x, dst_y + y_idx * 16 * dst_ystride + x_idx * 16, dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_ystride, dst_uvstride); - } + dst_ystride, dst_uvstride, mb_row + y_idx, mb_col + x_idx); } x->mb_to_top_edge = edge[0]; @@ -544,7 +674,9 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride) { + int dst_uvstride, + int mb_row, + int mb_col) { uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; @@ -557,27 +689,43 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; + int scaled_uv_offset; x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); - x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32; - x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; - x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->pre.y_stride, + &x->scale_factor[0]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->pre.uv_stride, + &x->scale_factor_uv[0]); + x->pre.u_buffer = u1 + scaled_uv_offset; + x->pre.v_buffer = v1 + scaled_uv_offset; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32; - x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; - x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->second_pre.y_stride, + &x->scale_factor[1]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->second_pre.uv_stride, + &x->scale_factor_uv[1]); + x->second_pre.u_buffer = u2 + scaled_uv_offset; + x->second_pre.v_buffer = v2 + scaled_uv_offset; } vp9_build_inter32x32_predictors_sb(x, dst_y + y_idx * 32 * dst_ystride + x_idx * 32, dst_u + y_idx * 16 * dst_uvstride + x_idx * 16, dst_v + y_idx * 16 * dst_uvstride + x_idx * 16, - dst_ystride, dst_uvstride); + dst_ystride, dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2); } x->mb_to_top_edge = edge[0]; @@ -603,171 +751,48 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, #endif } -/* - * The following functions should be called after an initial - * call to vp9_build_1st_inter16x16_predictors_mb() or _mby()/_mbuv(). - * It will run a second filter on a (different) ref - * frame and average the result with the output of the - * first filter. The second reference frame is stored - * in x->second_pre (the reference frame index is in - * x->mode_info_context->mbmi.second_ref_frame). The second - * motion vector is x->mode_info_context->mbmi.second_mv. - * - * This allows blending prediction from two reference frames - * which sometimes leads to better prediction than from a - * single reference framer. - */ -void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride) { - uint8_t *ptr; - - int_mv _16x16mv; - int mv_row; - int mv_col; - - uint8_t *ptr_base = xd->second_pre.y_buffer; - int pre_stride = xd->block[0].pre_stride; - - _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int; - - if (xd->mode_info_context->mbmi.need_to_clamp_secondmv) - clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); - - mv_row = _16x16mv.as_mv.row; - mv_col = _16x16mv.as_mv.col; - - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - - xd->subpix.predict[!!(mv_col & 7)][!!(mv_row & 7)][1]( - ptr, pre_stride, dst_y, dst_ystride, - xd->subpix.filter_x[(mv_col & 7) << 1], xd->subpix.x_step_q4, - xd->subpix.filter_y[(mv_row & 7) << 1], xd->subpix.y_step_q4, - 16, 16); -} - -void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride) { - int offset; - uint8_t *uptr, *vptr; - - int_mv _16x16mv; - int mv_row; - int mv_col; - int omv_row, omv_col; - - int pre_stride = xd->block[0].pre_stride; - - _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int; - - if (xd->mode_info_context->mbmi.need_to_clamp_secondmv) - clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); - - mv_row = _16x16mv.as_mv.row; - mv_col = _16x16mv.as_mv.col; - - /* calc uv motion vectors */ - omv_row = mv_row; - omv_col = mv_col; - mv_row = (mv_row + (mv_row > 0)) >> 1; - mv_col = (mv_col + (mv_col > 0)) >> 1; - - mv_row &= xd->fullpixel_mask; - mv_col &= xd->fullpixel_mask; - - pre_stride >>= 1; - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = xd->second_pre.u_buffer + offset; - vptr = xd->second_pre.v_buffer + offset; - - xd->subpix.predict[!!(omv_col & 15)][!!(omv_row & 15)][1]( - uptr, pre_stride, dst_u, dst_uvstride, - xd->subpix.filter_x[omv_col & 15], xd->subpix.x_step_q4, - xd->subpix.filter_y[omv_row & 15], xd->subpix.y_step_q4, - 8, 8); - - xd->subpix.predict[!!(omv_col & 15)][!!(omv_row & 15)][1]( - vptr, pre_stride, dst_v, dst_uvstride, - xd->subpix.filter_x[omv_col & 15], xd->subpix.x_step_q4, - xd->subpix.filter_y[omv_row & 15], xd->subpix.y_step_q4, - 8, 8); -} - -void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride) { - vp9_build_2nd_inter16x16_predictors_mby(xd, dst_y, dst_ystride); - vp9_build_2nd_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride); -} - -static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { +static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, + int mb_row, int mb_col) { int i; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; BLOCKD *blockd = xd->block; + int which_mv = 0; + const int use_second_ref = mbmi->second_ref_frame > 0; if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { - blockd[ 0].bmi = xd->mode_info_context->bmi[ 0]; - blockd[ 2].bmi = xd->mode_info_context->bmi[ 2]; - blockd[ 8].bmi = xd->mode_info_context->bmi[ 8]; - blockd[10].bmi = xd->mode_info_context->bmi[10]; - - if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[0].as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[0].as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[0].as_mv, xd); - clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[0].as_mv, xd); - if (mbmi->second_ref_frame > 0) { - clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[1].as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[1].as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[1].as_mv, xd); - clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[1].as_mv, xd); - } - } + for (i = 0; i < 16; i += 8) { + BLOCKD *d0 = &blockd[i]; + BLOCKD *d1 = &blockd[i + 2]; + const int y = i & 8; + blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; + blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; - vp9_build_inter_predictors4b(xd, &blockd[ 0], 16); - vp9_build_inter_predictors4b(xd, &blockd[ 2], 16); - vp9_build_inter_predictors4b(xd, &blockd[ 8], 16); - vp9_build_inter_predictors4b(xd, &blockd[10], 16); + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + if (mbmi->need_to_clamp_mvs) { + clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); + } - if (mbmi->second_ref_frame > 0) { - vp9_build_2nd_inter_predictors4b(xd, &blockd[ 0], 16); - vp9_build_2nd_inter_predictors4b(xd, &blockd[ 2], 16); - vp9_build_2nd_inter_predictors4b(xd, &blockd[ 8], 16); - vp9_build_2nd_inter_predictors4b(xd, &blockd[10], 16); + build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, + which_mv, &xd->subpix, + mb_row * 16 + y, mb_col * 16); + } } } else { for (i = 0; i < 16; i += 2) { BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; + const int x = (i & 3) * 4; + const int y = (i >> 2) * 4; blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; - if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[0].as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[0].as_mv, xd); - if (mbmi->second_ref_frame > 0) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[1].as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[1].as_mv, xd); - } - } - - if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int) - build_inter_predictors2b(xd, d0, 16); - else { - vp9_build_inter_predictors_b(d0, 16, &xd->subpix); - vp9_build_inter_predictors_b(d1, 16, &xd->subpix); - } - - if (mbmi->second_ref_frame > 0) { - vp9_build_2nd_inter_predictors_b(d0, 16, &xd->subpix); - vp9_build_2nd_inter_predictors_b(d1, 16, &xd->subpix); + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, + which_mv, &xd->subpix, + mb_row * 16 + y, mb_col * 16 + x); } } } @@ -775,17 +800,13 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { for (i = 16; i < 24; i += 2) { BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; + const int x = 4 * (i & 1); + const int y = ((i - 16) >> 1) * 4; - if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int) - build_inter_predictors2b(xd, d0, 8); - else { - vp9_build_inter_predictors_b(d0, 8, &xd->subpix); - vp9_build_inter_predictors_b(d1, 8, &xd->subpix); - } - - if (mbmi->second_ref_frame > 0) { - vp9_build_2nd_inter_predictors_b(d0, 8, &xd->subpix); - vp9_build_2nd_inter_predictors_b(d1, 8, &xd->subpix); + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, + which_mv, &xd->subpix, + mb_row * 8 + y, mb_col * 8 + x); } } } @@ -882,22 +903,31 @@ void build_4x4uvmvs(MACROBLOCKD *xd) { } } -void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) { +void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride, + int mb_row, + int mb_col) { + vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col); + vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride, + mb_row, mb_col); +} + + +void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, + int mb_row, + int mb_col) { if (xd->mode_info_context->mbmi.mode != SPLITMV) { - vp9_build_1st_inter16x16_predictors_mb(xd, xd->predictor, - &xd->predictor[256], - &xd->predictor[320], 16, 8); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - /* 256 = offset of U plane in Y+U+V buffer; - * 320 = offset of V plane in Y+U+V buffer. - * (256=16x16, 320=16x16+8x8). */ - vp9_build_2nd_inter16x16_predictors_mb(xd, xd->predictor, - &xd->predictor[256], - &xd->predictor[320], 16, 8); - } + vp9_build_inter16x16_predictors_mb(xd, xd->predictor, + &xd->predictor[256], + &xd->predictor[320], 16, 8, + mb_row, mb_col); + #if CONFIG_COMP_INTERINTRA_PRED - else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, xd->predictor, &xd->predictor[256], &xd->predictor[320], 16, 8); @@ -905,6 +935,6 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) { #endif } else { build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd); + build_inter4x4_predictors_mb(xd, mb_row, mb_col); } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 903bd2e86..831ce2a73 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -16,71 +16,126 @@ struct subpix_fn_table; -extern void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int clamp_mvs); - -extern void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride); - -extern void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride); - -extern void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride); - -extern void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride); - -extern void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride); - -extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride); - -extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride); - -extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd); - -extern void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, - struct subpix_fn_table *sppf); - -extern void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch, - struct subpix_fn_table *sppf); - -extern void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, - int pitch); - -extern void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd, - BLOCKD *d, int pitch); - -extern void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd); - -extern void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATIONFILTERTYPE filter, - VP9_COMMON *cm); +void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col); + +void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col); + +void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride, + int mb_row, + int mb_col); + +void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride, + int mb_row, + int mb_col); + +void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride, + int mb_row, + int mb_col); + +void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, + int mb_row, + int mb_col); + +void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, + int mb_row, + int mb_col); + +void vp9_setup_interp_filters(MACROBLOCKD *xd, + INTERPOLATIONFILTERTYPE filter, + VP9_COMMON *cm); + +void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, + YV12_BUFFER_CONFIG *other, + int this_w, int this_h); + +void vp9_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int_mv *mv_q3, + const struct scale_factors *scale, + int w, int h, int do_avg, + const struct subpix_fn_table *subpix); + +void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int_mv *fullpel_mv_q3, + const int_mv *frac_mv_q4, + const struct scale_factors *scale, + int w, int h, int do_avg, + const struct subpix_fn_table *subpix); + +static int scale_value_x(int val, const struct scale_factors *scale) { + return val * scale->x_num / scale->x_den; +} + +static int scale_value_y(int val, const struct scale_factors *scale) { + return val * scale->y_num / scale->y_den; +} + +static int scaled_buffer_offset(int x_offset, + int y_offset, + int stride, + const struct scale_factors *scale) { + return scale_value_y(y_offset, scale) * stride + + scale_value_x(x_offset, scale); +} + +static void setup_pred_block(YV12_BUFFER_CONFIG *dst, + const YV12_BUFFER_CONFIG *src, + int mb_row, int mb_col, + const struct scale_factors *scale, + const struct scale_factors *scale_uv) { + const int recon_y_stride = src->y_stride; + const int recon_uv_stride = src->uv_stride; + int recon_yoffset; + int recon_uvoffset; + + if (scale) { + recon_yoffset = scaled_buffer_offset(16 * mb_col, 16 * mb_row, + recon_y_stride, scale); + recon_uvoffset = scaled_buffer_offset(8 * mb_col, 8 * mb_row, + recon_uv_stride, scale_uv); + } else { + recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; + recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; + } + *dst = *src; + dst->y_buffer += recon_yoffset; + dst->u_buffer += recon_uvoffset; + dst->v_buffer += recon_uvoffset; +} + +static void set_scale_factors(MACROBLOCKD *xd, + int ref0, int ref1, + struct scale_factors scale_factor[MAX_REF_FRAMES]) { + + xd->scale_factor[0] = scale_factor[ref0 >= 0 ? ref0 : 0]; + xd->scale_factor[1] = scale_factor[ref1 >= 0 ? ref1 : 0]; + xd->scale_factor_uv[0] = xd->scale_factor[0]; + xd->scale_factor_uv[1] = xd->scale_factor[1]; +} #endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index 88584ad3b..3031fb699 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -14,37 +14,43 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" -extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd); -extern B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n); -extern B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x); +void vp9_recon_intra_mbuv(MACROBLOCKD *xd); + +B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, + int stride, int n); + +B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x); + #if CONFIG_COMP_INTERINTRA_PRED -extern void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride); -extern void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride); -extern void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int uvstride); +void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride); + +void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, + uint8_t *ypred, + int ystride); + +void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, + uint8_t *upred, + uint8_t *vpred, + int uvstride); #endif // CONFIG_COMP_INTERINTRA_PRED -extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride); -extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride); +void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride); + +void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride); #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c index 277d5b217..72613ae07 100644 --- a/vp9/common/vp9_rtcd.c +++ b/vp9/common/vp9_rtcd.c @@ -12,10 +12,9 @@ #include "vp9_rtcd.h" #include "vpx_ports/vpx_once.h" -extern void vpx_scale_rtcd(void); +void vpx_scale_rtcd(void); -void vp9_rtcd() -{ +void vp9_rtcd() { vpx_scale_rtcd(); once(setup_rtcd_internal); } diff --git a/vp9/common/vp9_setupintrarecon.h b/vp9/common/vp9_setupintrarecon.h index 457265528..e389f3c91 100644 --- a/vp9/common/vp9_setupintrarecon.h +++ b/vp9/common/vp9_setupintrarecon.h @@ -13,6 +13,6 @@ #include "vpx_scale/yv12config.h" -extern void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); +void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); #endif // VP9_COMMON_VP9_SETUPINTRARECON_H_ diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index a1225f1dc..5893c1132 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -12,6 +12,7 @@ #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_reconinter.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/common/vp9_findnearmv.h" #include "vp9/common/vp9_common.h" @@ -697,6 +698,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int mb_to_top_edge; int mb_to_bottom_edge; const int mb_size = 1 << mi->mbmi.sb_type; + const int use_prev_in_find_mv_refs = cm->Width == cm->last_width && + cm->Height == cm->last_height && + !cm->error_resilient_mode; mb_to_top_edge = xd->mb_to_top_edge; mb_to_bottom_edge = xd->mb_to_bottom_edge; @@ -749,25 +753,22 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int_mv nearest_second, nearby_second, best_mv_second; vp9_prob mv_ref_p [VP9_MVREFS - 1]; - int recon_y_stride, recon_yoffset; - int recon_uv_stride, recon_uvoffset; MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; + xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; { int ref_fb_idx; + const int use_prev_in_find_best_ref = + xd->scale_factor[0].x_num == xd->scale_factor[0].x_den && + xd->scale_factor[0].y_num == xd->scale_factor[0].y_den && + !cm->error_resilient_mode && + !cm->frame_parallel_decoding_mode; /* Select the appropriate reference frame for this MB */ ref_fb_idx = cm->active_ref_idx[ref_frame - 1]; - recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride ; - recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], + mb_row, mb_col, &xd->scale_factor[0], &xd->scale_factor_uv[0]); #ifdef DEC_DEBUG if (dec_debug) @@ -776,7 +777,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif // if (cm->current_video_frame == 1 && mb_row == 4 && mb_col == 5) // printf("Dello\n"); - vp9_find_mv_refs(cm, xd, mi, cm->error_resilient_mode ? 0 : prev_mi, + vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, ref_frame, mbmi->ref_mvs[ref_frame], cm->ref_frame_sign_bias); @@ -809,10 +810,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->mode != ZEROMV) { vp9_find_best_ref_mvs(xd, - pbi->common.error_resilient_mode || - pbi->common.frame_parallel_decoding_mode ? - 0 : xd->pre.y_buffer, - recon_y_stride, + use_prev_in_find_best_ref ? + xd->pre.y_buffer : NULL, + xd->pre.y_stride, mbmi->ref_mvs[ref_frame], &nearest, &nearby); @@ -853,27 +853,31 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->second_ref_frame = 1; if (mbmi->second_ref_frame > 0) { int second_ref_fb_idx; + int use_prev_in_find_best_ref; + + xd->scale_factor[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1]; + use_prev_in_find_best_ref = + xd->scale_factor[1].x_num == xd->scale_factor[1].x_den && + xd->scale_factor[1].y_num == xd->scale_factor[1].y_den && + !cm->error_resilient_mode && + !cm->frame_parallel_decoding_mode; + /* Select the appropriate reference frame for this MB */ second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; - xd->second_pre.y_buffer = - cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; - xd->second_pre.u_buffer = - cm->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; - xd->second_pre.v_buffer = - cm->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; + setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], + mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]); - vp9_find_mv_refs(cm, xd, mi, cm->error_resilient_mode ? 0 : prev_mi, + vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, mbmi->second_ref_frame, mbmi->ref_mvs[mbmi->second_ref_frame], cm->ref_frame_sign_bias); if (mbmi->mode != ZEROMV) { vp9_find_best_ref_mvs(xd, - pbi->common.error_resilient_mode || - pbi->common.frame_parallel_decoding_mode ? - 0 : xd->second_pre.y_buffer, - recon_y_stride, + use_prev_in_find_best_ref ? + xd->second_pre.y_buffer : NULL, + xd->second_pre.y_stride, mbmi->ref_mvs[mbmi->second_ref_frame], &nearest_second, &nearby_second); @@ -1089,7 +1093,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, break; case NEWMV: - read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc); read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc, xd->allow_high_precision_mv); @@ -1230,8 +1233,12 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MODE_INFO *mi = xd->mode_info_context; MODE_INFO *prev_mi = xd->prev_mode_info_context; - if (pbi->common.frame_type == KEY_FRAME) + if (pbi->common.frame_type == KEY_FRAME) { kfread_modes(pbi, mi, mb_row, mb_col, bc); - else + } else { read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc); + set_scale_factors(xd, + mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1, + pbi->common.active_ref_scale); + } } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 05a1bf9e0..eefdbb92b 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -147,7 +147,8 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { /* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy. */ -static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { +static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, int mb_col) { BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { @@ -168,32 +169,26 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else if (sb_type == BLOCK_SIZE_SB32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else { - vp9_build_1st_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - } + vp9_build_inter16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED - else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, @@ -608,7 +603,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. */ - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } @@ -619,7 +614,8 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } /* dequantization and idct */ @@ -729,7 +725,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. */ - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } @@ -740,7 +736,8 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } /* dequantization and idct */ @@ -841,7 +838,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and mb_skip_coeff are zero. */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } #ifdef DEC_DEBUG @@ -868,7 +865,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.mode, tx_size, xd->mode_info_context->mbmi.interp_filter); #endif - vp9_build_inter_predictors_mb(xd); + vp9_build_inter_predictors_mb(xd, mb_row, mb_col); } if (tx_size == TX_16X16) { @@ -975,18 +972,14 @@ static void set_refs(VP9D_COMP *pbi, int block_size, MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->ref_frame > INTRA_FRAME) { - int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride; + int ref_fb_idx; /* Select the appropriate reference frame for this MB */ ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1]; - - ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col; - xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset; - ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col; - xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset; - xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset; + xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); /* propagate errors from reference frames */ xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted; @@ -997,12 +990,9 @@ static void set_refs(VP9D_COMP *pbi, int block_size, /* Select the appropriate reference frame for this MB */ second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; - xd->second_pre.y_buffer = - cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset; - xd->second_pre.u_buffer = - cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset; - xd->second_pre.v_buffer = - cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset; + setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); /* propagate errors from reference frames */ xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; @@ -1213,6 +1203,26 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { } } +static void update_frame_size(VP9D_COMP *pbi) { + VP9_COMMON *cm = &pbi->common; + + /* our internal buffers are always multiples of 16 */ + int width = (cm->Width + 15) & ~15; + int height = (cm->Height + 15) & ~15; + + cm->mb_rows = height >> 4; + cm->mb_cols = width >> 4; + cm->MBs = cm->mb_rows * cm->mb_cols; + cm->mode_info_stride = cm->mb_cols + 1; + memset(cm->mip, 0, + (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO)); + vp9_update_mode_info_border(cm, cm->mip); + + cm->mi = cm->mip + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; + vp9_update_mode_info_in_image(cm, cm->mi); +} + int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { BOOL_DECODER header_bc, residual_bc; VP9_COMMON *const pc = &pbi->common; @@ -1290,9 +1300,25 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { "Invalid frame height"); } - if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); + if (!pbi->initial_width || !pbi->initial_height) { + if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffers"); + pbi->initial_width = pc->Width; + pbi->initial_height = pc->Height; + } + + if (pc->Width > pbi->initial_width) { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Frame width too large"); + } + + if (pc->Height > pbi->initial_height) { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Frame height too large"); + } + + update_frame_size(pbi); } } } @@ -1304,6 +1330,11 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { init_frame(pbi); + /* Reset the frame pointers to the current frame size */ + vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx], + pc->mb_cols * 16, pc->mb_rows * 16, + VP9BORDERINPIXELS); + if (vp9_start_decode(&header_bc, data, (unsigned int)first_partition_length_in_bytes)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, @@ -1736,6 +1767,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } corrupt_tokens |= xd->corrupted; + // keep track of the last coded dimensions + pc->last_width = pc->Width; + pc->last_height = pc->Height; + /* Collect information about decoder corruption. */ /* 1. Check first boolean decoder for errors. */ pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 0e205bdda..1d6c66afd 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -65,7 +65,7 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, for (i = 0; i < 16; i++) input[i] = dq[i] * input[i]; - vp9_short_iht4x4(input, output, 8, tx_type); + vp9_short_iht4x4(input, output, 4, tx_type); vpx_memset(input, 0, 32); add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); @@ -86,7 +86,7 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, for (i = 1; i < 64; i++) input[i] *= dq[1]; - vp9_short_iht8x8(input, output, 16, tx_type); + vp9_short_iht8x8(input, output, 8, tx_type); vpx_memset(input, 0, 128); add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); @@ -247,7 +247,7 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, input[i] *= dq[1]; // inverse hybrid transform - vp9_short_iht16x16(input, output, 32, tx_type); + vp9_short_iht16x16(input, output, 16, tx_type); // the idct halves ( >> 1) the pitch // vp9_short_idct16x16_c(input, output, 32); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 91042c4fe..599c5bb57 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -110,15 +110,12 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, case TX_8X8: coef_probs = fc->coef_probs_8x8; coef_counts = fc->coef_counts_8x8; -#if CONFIG_CNVCONTEXT above_ec = (A0[aidx] + A0[aidx + 1]) != 0; left_ec = (L0[lidx] + L0[lidx + 1]) != 0; -#endif break; case TX_16X16: coef_probs = fc->coef_probs_16x16; coef_counts = fc->coef_counts_16x16; -#if CONFIG_CNVCONTEXT if (type == PLANE_TYPE_UV) { ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); @@ -128,12 +125,10 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0; left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0; } -#endif break; case TX_32X32: coef_probs = fc->coef_probs_32x32; coef_counts = fc->coef_counts_32x32; -#if CONFIG_CNVCONTEXT if (type == PLANE_TYPE_UV) { ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); @@ -153,7 +148,6 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3] + L1[lidx] + L1[lidx + 1] + L1[lidx + 2] + L1[lidx + 3]) != 0; } -#endif break; } diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 8c1f76e73..0e6d059af 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -38,6 +38,8 @@ typedef struct VP9Decompressor { int decoded_key_frame; + int initial_width; + int initial_height; } VP9D_COMP; int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end); diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index e2f3e2677..080f4a70b 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -105,7 +105,6 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { int16_t out[4 * 4]; int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; int i, j; int16_t temp_in[4], temp_out[4]; @@ -137,7 +136,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output, // column transform for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) - temp_in[j] = input[j * short_pitch + i] << 4; + temp_in[j] = input[j * pitch + i] << 4; if (i == 0 && temp_in[0]) temp_in[0] += 1; fwdc(temp_in, temp_out); @@ -308,7 +307,6 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { int16_t out[64]; int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; int i, j; int16_t temp_in[8], temp_out[8]; @@ -339,7 +337,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, // column transform for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) - temp_in[j] = input[j * short_pitch + i] << 2; + temp_in[j] = input[j * pitch + i] << 2; fwdc(temp_in, temp_out); for (j = 0; j < 8; ++j) outptr[j * 8 + i] = temp_out[j]; @@ -697,7 +695,6 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { int16_t out[256]; int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; int i, j; int16_t temp_in[16], temp_out[16]; @@ -728,7 +725,7 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output, // column transform for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) - temp_in[j] = input[j * short_pitch + i] << 2; + temp_in[j] = input[j * pitch + i] << 2; fwdc(temp_in, temp_out); for (j = 0; j < 16; ++j) outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; @@ -744,412 +741,9 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output, } } -#define TEST_INT_32x32_DCT 1 - -#if !TEST_INT_32x32_DCT - -static void dct32_1d(double *input, double *output, int stride) { - static const double C1 = 0.998795456205; // cos(pi * 1 / 64) - static const double C2 = 0.995184726672; // cos(pi * 2 / 64) - static const double C3 = 0.989176509965; // cos(pi * 3 / 64) - static const double C4 = 0.980785280403; // cos(pi * 4 / 64) - static const double C5 = 0.970031253195; // cos(pi * 5 / 64) - static const double C6 = 0.956940335732; // cos(pi * 6 / 64) - static const double C7 = 0.941544065183; // cos(pi * 7 / 64) - static const double C8 = 0.923879532511; // cos(pi * 8 / 64) - static const double C9 = 0.903989293123; // cos(pi * 9 / 64) - static const double C10 = 0.881921264348; // cos(pi * 10 / 64) - static const double C11 = 0.857728610000; // cos(pi * 11 / 64) - static const double C12 = 0.831469612303; // cos(pi * 12 / 64) - static const double C13 = 0.803207531481; // cos(pi * 13 / 64) - static const double C14 = 0.773010453363; // cos(pi * 14 / 64) - static const double C15 = 0.740951125355; // cos(pi * 15 / 64) - static const double C16 = 0.707106781187; // cos(pi * 16 / 64) - static const double C17 = 0.671558954847; // cos(pi * 17 / 64) - static const double C18 = 0.634393284164; // cos(pi * 18 / 64) - static const double C19 = 0.595699304492; // cos(pi * 19 / 64) - static const double C20 = 0.555570233020; // cos(pi * 20 / 64) - static const double C21 = 0.514102744193; // cos(pi * 21 / 64) - static const double C22 = 0.471396736826; // cos(pi * 22 / 64) - static const double C23 = 0.427555093430; // cos(pi * 23 / 64) - static const double C24 = 0.382683432365; // cos(pi * 24 / 64) - static const double C25 = 0.336889853392; // cos(pi * 25 / 64) - static const double C26 = 0.290284677254; // cos(pi * 26 / 64) - static const double C27 = 0.242980179903; // cos(pi * 27 / 64) - static const double C28 = 0.195090322016; // cos(pi * 28 / 64) - static const double C29 = 0.146730474455; // cos(pi * 29 / 64) - static const double C30 = 0.098017140330; // cos(pi * 30 / 64) - static const double C31 = 0.049067674327; // cos(pi * 31 / 64) - - double step[32]; - - // Stage 1 - step[0] = input[stride*0] + input[stride*(32 - 1)]; - step[1] = input[stride*1] + input[stride*(32 - 2)]; - step[2] = input[stride*2] + input[stride*(32 - 3)]; - step[3] = input[stride*3] + input[stride*(32 - 4)]; - step[4] = input[stride*4] + input[stride*(32 - 5)]; - step[5] = input[stride*5] + input[stride*(32 - 6)]; - step[6] = input[stride*6] + input[stride*(32 - 7)]; - step[7] = input[stride*7] + input[stride*(32 - 8)]; - step[8] = input[stride*8] + input[stride*(32 - 9)]; - step[9] = input[stride*9] + input[stride*(32 - 10)]; - step[10] = input[stride*10] + input[stride*(32 - 11)]; - step[11] = input[stride*11] + input[stride*(32 - 12)]; - step[12] = input[stride*12] + input[stride*(32 - 13)]; - step[13] = input[stride*13] + input[stride*(32 - 14)]; - step[14] = input[stride*14] + input[stride*(32 - 15)]; - step[15] = input[stride*15] + input[stride*(32 - 16)]; - step[16] = -input[stride*16] + input[stride*(32 - 17)]; - step[17] = -input[stride*17] + input[stride*(32 - 18)]; - step[18] = -input[stride*18] + input[stride*(32 - 19)]; - step[19] = -input[stride*19] + input[stride*(32 - 20)]; - step[20] = -input[stride*20] + input[stride*(32 - 21)]; - step[21] = -input[stride*21] + input[stride*(32 - 22)]; - step[22] = -input[stride*22] + input[stride*(32 - 23)]; - step[23] = -input[stride*23] + input[stride*(32 - 24)]; - step[24] = -input[stride*24] + input[stride*(32 - 25)]; - step[25] = -input[stride*25] + input[stride*(32 - 26)]; - step[26] = -input[stride*26] + input[stride*(32 - 27)]; - step[27] = -input[stride*27] + input[stride*(32 - 28)]; - step[28] = -input[stride*28] + input[stride*(32 - 29)]; - step[29] = -input[stride*29] + input[stride*(32 - 30)]; - step[30] = -input[stride*30] + input[stride*(32 - 31)]; - step[31] = -input[stride*31] + input[stride*(32 - 32)]; - - // Stage 2 - output[stride*0] = step[0] + step[16 - 1]; - output[stride*1] = step[1] + step[16 - 2]; - output[stride*2] = step[2] + step[16 - 3]; - output[stride*3] = step[3] + step[16 - 4]; - output[stride*4] = step[4] + step[16 - 5]; - output[stride*5] = step[5] + step[16 - 6]; - output[stride*6] = step[6] + step[16 - 7]; - output[stride*7] = step[7] + step[16 - 8]; - output[stride*8] = -step[8] + step[16 - 9]; - output[stride*9] = -step[9] + step[16 - 10]; - output[stride*10] = -step[10] + step[16 - 11]; - output[stride*11] = -step[11] + step[16 - 12]; - output[stride*12] = -step[12] + step[16 - 13]; - output[stride*13] = -step[13] + step[16 - 14]; - output[stride*14] = -step[14] + step[16 - 15]; - output[stride*15] = -step[15] + step[16 - 16]; - - output[stride*16] = step[16]; - output[stride*17] = step[17]; - output[stride*18] = step[18]; - output[stride*19] = step[19]; - - output[stride*20] = (-step[20] + step[27])*C16; - output[stride*21] = (-step[21] + step[26])*C16; - output[stride*22] = (-step[22] + step[25])*C16; - output[stride*23] = (-step[23] + step[24])*C16; - - output[stride*24] = (step[24] + step[23])*C16; - output[stride*25] = (step[25] + step[22])*C16; - output[stride*26] = (step[26] + step[21])*C16; - output[stride*27] = (step[27] + step[20])*C16; - - output[stride*28] = step[28]; - output[stride*29] = step[29]; - output[stride*30] = step[30]; - output[stride*31] = step[31]; - - // Stage 3 - step[0] = output[stride*0] + output[stride*(8 - 1)]; - step[1] = output[stride*1] + output[stride*(8 - 2)]; - step[2] = output[stride*2] + output[stride*(8 - 3)]; - step[3] = output[stride*3] + output[stride*(8 - 4)]; - step[4] = -output[stride*4] + output[stride*(8 - 5)]; - step[5] = -output[stride*5] + output[stride*(8 - 6)]; - step[6] = -output[stride*6] + output[stride*(8 - 7)]; - step[7] = -output[stride*7] + output[stride*(8 - 8)]; - step[8] = output[stride*8]; - step[9] = output[stride*9]; - step[10] = (-output[stride*10] + output[stride*13])*C16; - step[11] = (-output[stride*11] + output[stride*12])*C16; - step[12] = (output[stride*12] + output[stride*11])*C16; - step[13] = (output[stride*13] + output[stride*10])*C16; - step[14] = output[stride*14]; - step[15] = output[stride*15]; - - step[16] = output[stride*16] + output[stride*23]; - step[17] = output[stride*17] + output[stride*22]; - step[18] = output[stride*18] + output[stride*21]; - step[19] = output[stride*19] + output[stride*20]; - step[20] = -output[stride*20] + output[stride*19]; - step[21] = -output[stride*21] + output[stride*18]; - step[22] = -output[stride*22] + output[stride*17]; - step[23] = -output[stride*23] + output[stride*16]; - step[24] = -output[stride*24] + output[stride*31]; - step[25] = -output[stride*25] + output[stride*30]; - step[26] = -output[stride*26] + output[stride*29]; - step[27] = -output[stride*27] + output[stride*28]; - step[28] = output[stride*28] + output[stride*27]; - step[29] = output[stride*29] + output[stride*26]; - step[30] = output[stride*30] + output[stride*25]; - step[31] = output[stride*31] + output[stride*24]; - - // Stage 4 - output[stride*0] = step[0] + step[3]; - output[stride*1] = step[1] + step[2]; - output[stride*2] = -step[2] + step[1]; - output[stride*3] = -step[3] + step[0]; - output[stride*4] = step[4]; - output[stride*5] = (-step[5] + step[6])*C16; - output[stride*6] = (step[6] + step[5])*C16; - output[stride*7] = step[7]; - output[stride*8] = step[8] + step[11]; - output[stride*9] = step[9] + step[10]; - output[stride*10] = -step[10] + step[9]; - output[stride*11] = -step[11] + step[8]; - output[stride*12] = -step[12] + step[15]; - output[stride*13] = -step[13] + step[14]; - output[stride*14] = step[14] + step[13]; - output[stride*15] = step[15] + step[12]; - - output[stride*16] = step[16]; - output[stride*17] = step[17]; - output[stride*18] = step[18]*-C8 + step[29]*C24; - output[stride*19] = step[19]*-C8 + step[28]*C24; - output[stride*20] = step[20]*-C24 + step[27]*-C8; - output[stride*21] = step[21]*-C24 + step[26]*-C8; - output[stride*22] = step[22]; - output[stride*23] = step[23]; - output[stride*24] = step[24]; - output[stride*25] = step[25]; - output[stride*26] = step[26]*C24 + step[21]*-C8; - output[stride*27] = step[27]*C24 + step[20]*-C8; - output[stride*28] = step[28]*C8 + step[19]*C24; - output[stride*29] = step[29]*C8 + step[18]*C24; - output[stride*30] = step[30]; - output[stride*31] = step[31]; - - // Stage 5 - step[0] = (output[stride*0] + output[stride*1]) * C16; - step[1] = (-output[stride*1] + output[stride*0]) * C16; - step[2] = output[stride*2]*C24 + output[stride*3] * C8; - step[3] = output[stride*3]*C24 - output[stride*2] * C8; - step[4] = output[stride*4] + output[stride*5]; - step[5] = -output[stride*5] + output[stride*4]; - step[6] = -output[stride*6] + output[stride*7]; - step[7] = output[stride*7] + output[stride*6]; - step[8] = output[stride*8]; - step[9] = output[stride*9]*-C8 + output[stride*14]*C24; - step[10] = output[stride*10]*-C24 + output[stride*13]*-C8; - step[11] = output[stride*11]; - step[12] = output[stride*12]; - step[13] = output[stride*13]*C24 + output[stride*10]*-C8; - step[14] = output[stride*14]*C8 + output[stride*9]*C24; - step[15] = output[stride*15]; - - step[16] = output[stride*16] + output[stride*19]; - step[17] = output[stride*17] + output[stride*18]; - step[18] = -output[stride*18] + output[stride*17]; - step[19] = -output[stride*19] + output[stride*16]; - step[20] = -output[stride*20] + output[stride*23]; - step[21] = -output[stride*21] + output[stride*22]; - step[22] = output[stride*22] + output[stride*21]; - step[23] = output[stride*23] + output[stride*20]; - step[24] = output[stride*24] + output[stride*27]; - step[25] = output[stride*25] + output[stride*26]; - step[26] = -output[stride*26] + output[stride*25]; - step[27] = -output[stride*27] + output[stride*24]; - step[28] = -output[stride*28] + output[stride*31]; - step[29] = -output[stride*29] + output[stride*30]; - step[30] = output[stride*30] + output[stride*29]; - step[31] = output[stride*31] + output[stride*28]; - - // Stage 6 - output[stride*0] = step[0]; - output[stride*1] = step[1]; - output[stride*2] = step[2]; - output[stride*3] = step[3]; - output[stride*4] = step[4]*C28 + step[7]*C4; - output[stride*5] = step[5]*C12 + step[6]*C20; - output[stride*6] = step[6]*C12 + step[5]*-C20; - output[stride*7] = step[7]*C28 + step[4]*-C4; - output[stride*8] = step[8] + step[9]; - output[stride*9] = -step[9] + step[8]; - output[stride*10] = -step[10] + step[11]; - output[stride*11] = step[11] + step[10]; - output[stride*12] = step[12] + step[13]; - output[stride*13] = -step[13] + step[12]; - output[stride*14] = -step[14] + step[15]; - output[stride*15] = step[15] + step[14]; - - output[stride*16] = step[16]; - output[stride*17] = step[17]*-C4 + step[30]*C28; - output[stride*18] = step[18]*-C28 + step[29]*-C4; - output[stride*19] = step[19]; - output[stride*20] = step[20]; - output[stride*21] = step[21]*-C20 + step[26]*C12; - output[stride*22] = step[22]*-C12 + step[25]*-C20; - output[stride*23] = step[23]; - output[stride*24] = step[24]; - output[stride*25] = step[25]*C12 + step[22]*-C20; - output[stride*26] = step[26]*C20 + step[21]*C12; - output[stride*27] = step[27]; - output[stride*28] = step[28]; - output[stride*29] = step[29]*C28 + step[18]*-C4; - output[stride*30] = step[30]*C4 + step[17]*C28; - output[stride*31] = step[31]; - - // Stage 7 - step[0] = output[stride*0]; - step[1] = output[stride*1]; - step[2] = output[stride*2]; - step[3] = output[stride*3]; - step[4] = output[stride*4]; - step[5] = output[stride*5]; - step[6] = output[stride*6]; - step[7] = output[stride*7]; - step[8] = output[stride*8]*C30 + output[stride*15]*C2; - step[9] = output[stride*9]*C14 + output[stride*14]*C18; - step[10] = output[stride*10]*C22 + output[stride*13]*C10; - step[11] = output[stride*11]*C6 + output[stride*12]*C26; - step[12] = output[stride*12]*C6 + output[stride*11]*-C26; - step[13] = output[stride*13]*C22 + output[stride*10]*-C10; - step[14] = output[stride*14]*C14 + output[stride*9]*-C18; - step[15] = output[stride*15]*C30 + output[stride*8]*-C2; - - step[16] = output[stride*16] + output[stride*17]; - step[17] = -output[stride*17] + output[stride*16]; - step[18] = -output[stride*18] + output[stride*19]; - step[19] = output[stride*19] + output[stride*18]; - step[20] = output[stride*20] + output[stride*21]; - step[21] = -output[stride*21] + output[stride*20]; - step[22] = -output[stride*22] + output[stride*23]; - step[23] = output[stride*23] + output[stride*22]; - step[24] = output[stride*24] + output[stride*25]; - step[25] = -output[stride*25] + output[stride*24]; - step[26] = -output[stride*26] + output[stride*27]; - step[27] = output[stride*27] + output[stride*26]; - step[28] = output[stride*28] + output[stride*29]; - step[29] = -output[stride*29] + output[stride*28]; - step[30] = -output[stride*30] + output[stride*31]; - step[31] = output[stride*31] + output[stride*30]; - - // Final stage --- outputs indices are bit-reversed. - output[stride*0] = step[0]; - output[stride*16] = step[1]; - output[stride*8] = step[2]; - output[stride*24] = step[3]; - output[stride*4] = step[4]; - output[stride*20] = step[5]; - output[stride*12] = step[6]; - output[stride*28] = step[7]; - output[stride*2] = step[8]; - output[stride*18] = step[9]; - output[stride*10] = step[10]; - output[stride*26] = step[11]; - output[stride*6] = step[12]; - output[stride*22] = step[13]; - output[stride*14] = step[14]; - output[stride*30] = step[15]; - - output[stride*1] = step[16]*C31 + step[31]*C1; - output[stride*17] = step[17]*C15 + step[30]*C17; - output[stride*9] = step[18]*C23 + step[29]*C9; - output[stride*25] = step[19]*C7 + step[28]*C25; - output[stride*5] = step[20]*C27 + step[27]*C5; - output[stride*21] = step[21]*C11 + step[26]*C21; - output[stride*13] = step[22]*C19 + step[25]*C13; - output[stride*29] = step[23]*C3 + step[24]*C29; - output[stride*3] = step[24]*C3 + step[23]*-C29; - output[stride*19] = step[25]*C19 + step[22]*-C13; - output[stride*11] = step[26]*C11 + step[21]*-C21; - output[stride*27] = step[27]*C27 + step[20]*-C5; - output[stride*7] = step[28]*C7 + step[19]*-C25; - output[stride*23] = step[29]*C23 + step[18]*-C9; - output[stride*15] = step[30]*C15 + step[17]*-C17; - output[stride*31] = step[31]*C31 + step[16]*-C1; -} - -void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - int shortpitch = pitch >> 1; - int i, j; - double output[1024]; - // First transform columns - for (i = 0; i < 32; i++) { - double temp_in[32], temp_out[32]; - for (j = 0; j < 32; j++) - temp_in[j] = input[j*shortpitch + i]; - dct32_1d(temp_in, temp_out, 1); - for (j = 0; j < 32; j++) - output[j*32 + i] = temp_out[j]; - } - // Then transform rows - for (i = 0; i < 32; ++i) { - double temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = output[j + i*32]; - dct32_1d(temp_in, temp_out, 1); - for (j = 0; j < 32; ++j) - output[j + i*32] = temp_out[j]; - } - // Scale by some magic number - for (i = 0; i < 1024; i++) { - out[i] = (short)round(output[i]/4); - } - } - - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -#else - -#define RIGHT_SHIFT 13 -#define ROUNDING (1 << (RIGHT_SHIFT - 1)) - -static void dct32_1d(int *input, int *output, int last_shift_bits) { - static const int16_t C1 = 8182; // 2^13 - static const int16_t C2 = 8153; - static const int16_t C3 = 8103; - static const int16_t C4 = 8035; - static const int16_t C5 = 7946; - static const int16_t C6 = 7839; - static const int16_t C7 = 7713; - static const int16_t C8 = 7568; - static const int16_t C9 = 7405; - static const int16_t C10 = 7225; - static const int16_t C11 = 7027; - static const int16_t C12 = 6811; - static const int16_t C13 = 6580; - static const int16_t C14 = 6333; - static const int16_t C15 = 6070; - static const int16_t C16 = 5793; - static const int16_t C17 = 5501; - static const int16_t C18 = 5197; - static const int16_t C19 = 4880; - static const int16_t C20 = 4551; - static const int16_t C21 = 4212; - static const int16_t C22 = 3862; - static const int16_t C23 = 3503; - static const int16_t C24 = 3135; - static const int16_t C25 = 2760; - static const int16_t C26 = 2378; - static const int16_t C27 = 1990; - static const int16_t C28 = 1598; - static const int16_t C29 = 1202; - static const int16_t C30 = 803; - static const int16_t C31 = 402; +static void dct32_1d(int *input, int *output) { int step[32]; - - int last_rounding = 0; - int final_shift = RIGHT_SHIFT; - int final_rounding = 0; - - if (last_shift_bits > 0) - last_rounding = 1 << (last_shift_bits - 1); - - final_shift += last_shift_bits; - if (final_shift > 0) - final_rounding = 1 << (final_shift - 1); - // Stage 1 step[0] = input[0] + input[(32 - 1)]; step[1] = input[1] + input[(32 - 2)]; @@ -1207,15 +801,15 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { output[18] = step[18]; output[19] = step[19]; - output[20] = ((-step[20] + step[27]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[21] = ((-step[21] + step[26]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[22] = ((-step[22] + step[25]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[23] = ((-step[23] + step[24]) * C16 + ROUNDING) >> RIGHT_SHIFT; + output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64); + output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64); + output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64); + output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64); - output[24] = ((step[24] + step[23]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[25] = ((step[25] + step[22]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[26] = ((step[26] + step[21]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[27] = ((step[27] + step[20]) * C16 + ROUNDING) >> RIGHT_SHIFT; + output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64); + output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64); + output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64); + output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64); output[28] = step[28]; output[29] = step[29]; @@ -1233,10 +827,10 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { step[7] = -output[7] + output[(8 - 8)]; step[8] = output[8]; step[9] = output[9]; - step[10] = ((-output[10] + output[13]) * C16 + ROUNDING) >> RIGHT_SHIFT; - step[11] = ((-output[11] + output[12]) * C16 + ROUNDING) >> RIGHT_SHIFT; - step[12] = ((output[12] + output[11]) * C16 + ROUNDING) >> RIGHT_SHIFT; - step[13] = ((output[13] + output[10]) * C16 + ROUNDING) >> RIGHT_SHIFT; + step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64); + step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64); + step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64); + step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64); step[14] = output[14]; step[15] = output[15]; @@ -1263,8 +857,8 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { output[2] = -step[2] + step[1]; output[3] = -step[3] + step[0]; output[4] = step[4]; - output[5] = ((-step[5] + step[6]) * C16 + ROUNDING) >> RIGHT_SHIFT; - output[6] = ((step[6] + step[5]) * C16 + ROUNDING) >> RIGHT_SHIFT; + output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64); + output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64); output[7] = step[7]; output[8] = step[8] + step[11]; output[9] = step[9] + step[10]; @@ -1277,37 +871,37 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { output[16] = step[16]; output[17] = step[17]; - output[18] = (step[18] * -C8 + step[29] * C24 + ROUNDING) >> RIGHT_SHIFT; - output[19] = (step[19] * -C8 + step[28] * C24 + ROUNDING) >> RIGHT_SHIFT; - output[20] = (step[20] * -C24 + step[27] * -C8 + ROUNDING) >> RIGHT_SHIFT; - output[21] = (step[21] * -C24 + step[26] * -C8 + ROUNDING) >> RIGHT_SHIFT; + output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64); + output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64); + output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64); + output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64); output[22] = step[22]; output[23] = step[23]; output[24] = step[24]; output[25] = step[25]; - output[26] = (step[26] * C24 + step[21] * -C8 + ROUNDING) >> RIGHT_SHIFT; - output[27] = (step[27] * C24 + step[20] * -C8 + ROUNDING) >> RIGHT_SHIFT; - output[28] = (step[28] * C8 + step[19] * C24 + ROUNDING) >> RIGHT_SHIFT; - output[29] = (step[29] * C8 + step[18] * C24 + ROUNDING) >> RIGHT_SHIFT; + output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64); + output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64); + output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64); + output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64); output[30] = step[30]; output[31] = step[31]; // Stage 5 - step[0] = ((output[0] + output[1]) * C16 + ROUNDING) >> RIGHT_SHIFT; - step[1] = ((-output[1] + output[0]) * C16 + ROUNDING) >> RIGHT_SHIFT; - step[2] = (output[2] * C24 + output[3] * C8 + ROUNDING) >> RIGHT_SHIFT; - step[3] = (output[3] * C24 - output[2] * C8 + ROUNDING) >> RIGHT_SHIFT; + step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64); + step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64); + step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64); + step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64); step[4] = output[4] + output[5]; step[5] = -output[5] + output[4]; step[6] = -output[6] + output[7]; step[7] = output[7] + output[6]; step[8] = output[8]; - step[9] = (output[9] * -C8 + output[14] * C24 + ROUNDING) >> RIGHT_SHIFT; - step[10] = (output[10] * -C24 + output[13] * -C8 + ROUNDING) >> RIGHT_SHIFT; + step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64); + step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64); step[11] = output[11]; step[12] = output[12]; - step[13] = (output[13] * C24 + output[10] * -C8 + ROUNDING) >> RIGHT_SHIFT; - step[14] = (output[14] * C8 + output[9] * C24 + ROUNDING) >> RIGHT_SHIFT; + step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64); + step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64); step[15] = output[15]; step[16] = output[16] + output[19]; @@ -1332,10 +926,10 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { output[1] = step[1]; output[2] = step[2]; output[3] = step[3]; - output[4] = (step[4] * C28 + step[7] * C4 + ROUNDING) >> RIGHT_SHIFT; - output[5] = (step[5] * C12 + step[6] * C20 + ROUNDING) >> RIGHT_SHIFT; - output[6] = (step[6] * C12 + step[5] * -C20 + ROUNDING) >> RIGHT_SHIFT; - output[7] = (step[7] * C28 + step[4] * -C4 + ROUNDING) >> RIGHT_SHIFT; + output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64); + output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64); + output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64); + output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64); output[8] = step[8] + step[9]; output[9] = -step[9] + step[8]; output[10] = -step[10] + step[11]; @@ -1346,20 +940,20 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { output[15] = step[15] + step[14]; output[16] = step[16]; - output[17] = (step[17] * -C4 + step[30] * C28 + ROUNDING) >> RIGHT_SHIFT; - output[18] = (step[18] * -C28 + step[29] * -C4 + ROUNDING) >> RIGHT_SHIFT; + output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64); + output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64); output[19] = step[19]; output[20] = step[20]; - output[21] = (step[21] * -C20 + step[26] * C12 + ROUNDING) >> RIGHT_SHIFT; - output[22] = (step[22] * -C12 + step[25] * -C20 + ROUNDING) >> RIGHT_SHIFT; + output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64); + output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64); output[23] = step[23]; output[24] = step[24]; - output[25] = (step[25] * C12 + step[22] * -C20 + ROUNDING) >> RIGHT_SHIFT; - output[26] = (step[26] * C20 + step[21] * C12 + ROUNDING) >> RIGHT_SHIFT; + output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64); + output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64); output[27] = step[27]; output[28] = step[28]; - output[29] = (step[29] * C28 + step[18] * -C4 + ROUNDING) >> RIGHT_SHIFT; - output[30] = (step[30] * C4 + step[17] * C28 + ROUNDING) >> RIGHT_SHIFT; + output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64); + output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64); output[31] = step[31]; // Stage 7 @@ -1371,14 +965,14 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { step[5] = output[5]; step[6] = output[6]; step[7] = output[7]; - step[8] = (output[8] * C30 + output[15] * C2 + ROUNDING) >> RIGHT_SHIFT; - step[9] = (output[9] * C14 + output[14] * C18 + ROUNDING) >> RIGHT_SHIFT; - step[10] = (output[10] * C22 + output[13] * C10 + ROUNDING) >> RIGHT_SHIFT; - step[11] = (output[11] * C6 + output[12] * C26 + ROUNDING) >> RIGHT_SHIFT; - step[12] = (output[12] * C6 + output[11] * -C26 + ROUNDING) >> RIGHT_SHIFT; - step[13] = (output[13] * C22 + output[10] * -C10 + ROUNDING) >> RIGHT_SHIFT; - step[14] = (output[14] * C14 + output[9] * -C18 + ROUNDING) >> RIGHT_SHIFT; - step[15] = (output[15] * C30 + output[8] * -C2 + ROUNDING) >> RIGHT_SHIFT; + step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64); + step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64); + step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64); + step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64); + step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64); + step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64); + step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64); + step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64); step[16] = output[16] + output[17]; step[17] = -output[17] + output[16]; @@ -1398,62 +992,40 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) { step[31] = output[31] + output[30]; // Final stage --- outputs indices are bit-reversed. - output[0] = (step[0] + last_rounding) >> last_shift_bits; - output[16] = (step[1] + last_rounding) >> last_shift_bits; - output[8] = (step[2] + last_rounding) >> last_shift_bits; - output[24] = (step[3] + last_rounding) >> last_shift_bits; - output[4] = (step[4] + last_rounding) >> last_shift_bits; - output[20] = (step[5] + last_rounding) >> last_shift_bits; - output[12] = (step[6] + last_rounding) >> last_shift_bits; - output[28] = (step[7] + last_rounding) >> last_shift_bits; - output[2] = (step[8] + last_rounding) >> last_shift_bits; - output[18] = (step[9] + last_rounding) >> last_shift_bits; - output[10] = (step[10] + last_rounding) >> last_shift_bits; - output[26] = (step[11] + last_rounding) >> last_shift_bits; - output[6] = (step[12] + last_rounding) >> last_shift_bits; - output[22] = (step[13] + last_rounding) >> last_shift_bits; - output[14] = (step[14] + last_rounding) >> last_shift_bits; - output[30] = (step[15] + last_rounding) >> last_shift_bits; - - output[1] = (step[16] * C31 + step[31] * C1 + final_rounding) >> final_shift; - output[17] = (step[17] * C15 + step[30] * C17 + final_rounding) - >> final_shift; - output[9] = (step[18] * C23 + step[29] * C9 + final_rounding) >> final_shift; - output[25] = (step[19] * C7 + step[28] * C25 + final_rounding) >> final_shift; - output[5] = (step[20] * C27 + step[27] * C5 + final_rounding) >> final_shift; - output[21] = (step[21] * C11 + step[26] * C21 + final_rounding) - >> final_shift; - output[13] = (step[22] * C19 + step[25] * C13 + final_rounding) - >> final_shift; - output[29] = (step[23] * C3 + step[24] * C29 + final_rounding) >> final_shift; - output[3] = (step[24] * C3 + step[23] * -C29 + final_rounding) >> final_shift; - output[19] = (step[25] * C19 + step[22] * -C13 + final_rounding) - >> final_shift; - output[11] = (step[26] * C11 + step[21] * -C21 + final_rounding) - >> final_shift; - output[27] = (step[27] * C27 + step[20] * -C5 + final_rounding) - >> final_shift; - output[7] = (step[28] * C7 + step[19] * -C25 + final_rounding) >> final_shift; - output[23] = (step[29] * C23 + step[18] * -C9 + final_rounding) - >> final_shift; - output[15] = (step[30] * C15 + step[17] * -C17 + final_rounding) - >> final_shift; - output[31] = (step[31] * C31 + step[16] * -C1 + final_rounding) - >> final_shift; - - // Clamp to fit 16-bit. - if (last_shift_bits > 0) { - int i; - - for (i = 0; i < 32; i++) - if (output[i] < -32768) - output[i] = -32768; - else if (output[i] > 32767) - output[i] = 32767; - } + output[0] = step[0]; + output[16] = step[1]; + output[8] = step[2]; + output[24] = step[3]; + output[4] = step[4]; + output[20] = step[5]; + output[12] = step[6]; + output[28] = step[7]; + output[2] = step[8]; + output[18] = step[9]; + output[10] = step[10]; + output[26] = step[11]; + output[6] = step[12]; + output[22] = step[13]; + output[14] = step[14]; + output[30] = step[15]; + + output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64); + output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64); + output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64); + output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64); + output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64); + output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64); + output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64); + output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64); + output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); + output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); + output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); + output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); + output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); + output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); + output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); + output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); } -#undef RIGHT_SHIFT -#undef ROUNDING void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { int shortpitch = pitch >> 1; @@ -1463,10 +1035,10 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { for (i = 0; i < 32; i++) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; j++) - temp_in[j] = input[j * shortpitch + i]; - dct32_1d(temp_in, temp_out, 0); + temp_in[j] = input[j * shortpitch + i] << 2; + dct32_1d(temp_in, temp_out); for (j = 0; j < 32; j++) - output[j * 32 + i] = temp_out[j]; + output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } // Then transform rows @@ -1474,10 +1046,9 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 2); + dct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - out[j + i * 32] = temp_out[j]; + out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; } } -#endif diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 880555797..c0fe5ac76 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -654,7 +654,7 @@ static void set_offsets(VP9_COMP *cpi, // Set up destination pointers setup_pred_block(&xd->dst, &cm->yv12_fb[dst_fb_idx], - mb_row, mb_col); + mb_row, mb_col, NULL, NULL); /* Set up limit values for MV components to prevent them from * extending beyond the UMV borders assuming 16x16 block size */ @@ -679,7 +679,7 @@ static void set_offsets(VP9_COMP *cpi, xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); /* set up source buffers */ - setup_pred_block(&x->src, cpi->Source, mb_row, mb_col); + setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL); /* R/D setup */ x->rddiv = cpi->RDDIV; @@ -1187,7 +1187,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { // Copy data over into macro block data structures. x->src = *cpi->Source; - xd->pre = cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]]; + xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; // set up frame for intra coded blocks @@ -1272,9 +1272,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { totalrate = 0; - // Functions setup for all frame types so we can use MC in AltRef - vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm); - // Reset frame count of inter 0,0 motion vector usage. cpi->inter_zz_count = 0; @@ -2092,55 +2089,50 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, assert(cm->frame_type != KEY_FRAME); if (mbmi->ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (mbmi->ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); if (mbmi->second_ref_frame > 0) { int second_ref_fb_idx; if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (mbmi->second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); } if (!x->skip) { - vp9_encode_inter16x16(x); + vp9_encode_inter16x16(x, mb_row, mb_col); // Clear mb_skip_coeff if mb_no_coeff_skip is not set if (!cpi->common.mb_no_coeff_skip) mbmi->mb_skip_coeff = 0; } else { - vp9_build_1st_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - } + vp9_build_inter16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED - else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, @@ -2327,34 +2319,37 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, assert(cm->frame_type != KEY_FRAME); if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); if (xd->mode_info_context->mbmi.second_ref_frame > 0) { int second_ref_fb_idx; if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); } vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { @@ -2553,34 +2548,37 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, assert(cm->frame_type != KEY_FRAME); if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); if (xd->mode_info_context->mbmi.second_ref_frame > 0) { int second_ref_fb_idx; if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); } vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index ef64db1db..43bb4640c 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -54,9 +54,9 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { tx_type = get_tx_type_4x4(&x->e_mbd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type); + vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b) ; @@ -149,10 +149,10 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { tx_type = get_tx_type_8x8(xd, &xd->block[ib]); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type); + vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); x->quantize_b_8x8(x->block + idx, xd->block + idx); vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, - 32, tx_type); + 16, tx_type); } else { x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); @@ -164,9 +164,9 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type); + vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, b, b + 1); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 9ff5dd96a..ee08d263c 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -174,7 +174,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) { BLOCK *b = &x->block[i]; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(b->src_diff, b->coeff, 32, tx_type); + vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) { x->fwd_txm8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); @@ -209,7 +209,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { BLOCK *b = &x->block[i]; tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(b->src_diff, b->coeff, 32, tx_type); + vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type); } else { x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); @@ -219,7 +219,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { BLOCK *b = &x->block[i]; tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 32, tx_type); + vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type); } else { x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i + 2].coeff[0], 32); @@ -247,7 +247,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) { TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); vp9_clear_system_state(); if (tx_type != DCT_DCT) { - vp9_short_fht16x16(b->src_diff, b->coeff, 32, tx_type); + vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type); } else { x->fwd_txm16x16(&x->block[0].src_diff[0], &x->block[0].coeff[0], 32); @@ -597,13 +597,8 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { for (b = 0; b < 16; b += 4) { ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; -#if CONFIG_CNVCONTEXT ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8); a[1] = a[0] = above_ec; l[1] = l[0] = left_ec; @@ -621,13 +616,8 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { for (b = 16; b < 24; b += 4) { ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; -#if CONFIG_CNVCONTEXT ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8); } } @@ -645,13 +635,8 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) { if (!t_above || !t_left) return; -#if CONFIG_CNVCONTEXT ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; -#else - ta = t_above->y1[0]; - tl = t_left->y1[0]; -#endif optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16); } @@ -699,21 +684,21 @@ void vp9_fidct_mb(MACROBLOCK *x) { } } -void vp9_encode_inter16x16(MACROBLOCK *x) { +void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) { MACROBLOCKD *const xd = &x->e_mbd; - vp9_build_inter_predictors_mb(xd); + vp9_build_inter_predictors_mb(xd, mb_row, mb_col); subtract_mb(x); vp9_fidct_mb(x); vp9_recon_mb(xd); } /* this function is used by first pass only */ -void vp9_encode_inter16x16y(MACROBLOCK *x) { +void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) { MACROBLOCKD *xd = &x->e_mbd; BLOCK *b = &x->block[0]; - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index f3c679227..6356df215 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -23,14 +23,14 @@ typedef struct { #include "vp9/encoder/vp9_onyx_int.h" struct VP9_ENCODER_RTCD; -void vp9_encode_inter16x16(MACROBLOCK *x); +void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mbuv_4x4(MACROBLOCK *x); void vp9_transform_mby_4x4(MACROBLOCK *x); void vp9_optimize_mby_4x4(MACROBLOCK *x); void vp9_optimize_mbuv_4x4(MACROBLOCK *x); -void vp9_encode_inter16x16y(MACROBLOCK *x); +void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mb_8x8(MACROBLOCK *mb); void vp9_transform_mby_8x8(MACROBLOCK *x); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index a4924874d..4d0a299e8 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -436,10 +436,10 @@ void vp9_first_pass(VP9_COMP *cpi) { int recon_yoffset, recon_uvoffset; YV12_BUFFER_CONFIG *lst_yv12 = - &cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]]; + &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]]; YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; YV12_BUFFER_CONFIG *gld_yv12 = - &cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]]; + &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]]; int recon_y_stride = lst_yv12->y_stride; int recon_uv_stride = lst_yv12->uv_stride; int64_t intra_error = 0; @@ -613,7 +613,7 @@ void vp9_first_pass(VP9_COMP *cpi) { this_error = motion_error; vp9_set_mbmode_and_mvs(x, NEWMV, &mv); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_encode_inter16x16y(x); + vp9_encode_inter16x16y(x, mb_row, mb_col); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; @@ -1663,8 +1663,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Clip cpi->twopass.gf_group_bits based on user supplied data rate // variability limit (cpi->oxcf.two_pass_vbrmax_section) - if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) - cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; + if (cpi->twopass.gf_group_bits > + (int64_t)max_bits * cpi->baseline_gf_interval) + cpi->twopass.gf_group_bits = (int64_t)max_bits * cpi->baseline_gf_interval; // Reset the file position reset_fpf_position(cpi, start_pos); diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 218a47a8e..d6644c2aa 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -20,7 +20,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int_mv *ref_mv, - int_mv *dst_mv) { + int_mv *dst_mv, + int mb_row, + int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; BLOCK *b = &x->block[0]; @@ -72,7 +74,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, } vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, xd->predictor, 16, INT_MAX); @@ -93,8 +95,9 @@ static int do_16x16_motion_search YV12_BUFFER_CONFIG *buf, int buf_mb_y_offset, YV12_BUFFER_CONFIG *ref, - int mb_y_offset -) { + int mb_y_offset, + int mb_row, + int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err, tmp_err; @@ -124,7 +127,7 @@ static int do_16x16_motion_search // Test last reference frame using the previous best mv as the // starting point (best reference) for the search - tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv); + tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { err = tmp_err; dst_mv->as_int = tmp_mv.as_int; @@ -136,7 +139,8 @@ static int do_16x16_motion_search int_mv zero_ref_mv, tmp_mv; zero_ref_mv.as_int = 0; - tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv); + tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, + mb_row, mb_col); if (tmp_err < err) { dst_mv->as_int = tmp_mv.as_int; err = tmp_err; @@ -229,7 +233,9 @@ static void update_mbgraph_mb_stats int gld_y_offset, YV12_BUFFER_CONFIG *alt_ref, int_mv *prev_alt_ref_mv, - int arf_y_offset + int arf_y_offset, + int mb_row, + int mb_col ) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -249,7 +255,8 @@ static void update_mbgraph_mb_stats int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, &stats->ref[GOLDEN_FRAME].m.mv, buf, mb_y_offset, - golden_ref, gld_y_offset); + golden_ref, gld_y_offset, + mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { stats->ref[GOLDEN_FRAME].err = INT_MAX; @@ -292,6 +299,9 @@ static void update_mbgraph_frame_stats int_mv arf_top_mv, gld_top_mv; MODE_INFO mi_local; + // Make sure the mi context starts in a consistent state. + memset(&mi_local, 0, sizeof(mi_local)); + // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_top_mv.as_int = 0; gld_top_mv.as_int = 0; @@ -323,7 +333,8 @@ static void update_mbgraph_frame_stats update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref, &gld_left_mv, gld_y_in_offset, - alt_ref, &arf_left_mv, arf_y_in_offset); + alt_ref, &arf_left_mv, arf_y_in_offset, + mb_row, mb_col); arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int; gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int; if (mb_col == 0) { @@ -434,7 +445,7 @@ void vp9_update_mbgraph_stats VP9_COMMON *const cm = &cpi->common; int i, n_frames = vp9_lookahead_depth(cpi->lookahead); YV12_BUFFER_CONFIG *golden_ref = - &cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]]; + &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]]; // we need to look ahead beyond where the ARF transitions into // being a GF - so exit if we don't look ahead beyond that diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index feb1e36c0..ced6eddca 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_onyx_int.h" @@ -832,7 +833,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } { - int y_stride = cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_stride; + int y_stride = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].y_stride; if (cpi->sf.search_method == NSTEP) { vp9_init3smotion_compensation(&cpi->mb, y_stride); @@ -1753,7 +1754,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) { #endif if (cpi->b_calculate_psnr) { YV12_BUFFER_CONFIG *lst_yv12 = - &cpi->common.yv12_fb[cpi->common.active_ref_idx[cpi->lst_fb_idx]]; + &cpi->common.yv12_fb[cpi->common.ref_frame_map[cpi->lst_fb_idx]]; double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height; double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error); double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2); @@ -2098,11 +2099,11 @@ int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, int ref_fb_idx; if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx]; else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx]; else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx]; else return -1; @@ -2119,11 +2120,11 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, int ref_fb_idx; if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx]; else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx]; else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx]; else return -1; @@ -2198,6 +2199,69 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } #endif +static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, + YV12_BUFFER_CONFIG *dst_fb) { + const int in_w = src_fb->y_width; + const int in_h = src_fb->y_height; + const int out_w = dst_fb->y_width; + const int out_h = dst_fb->y_height; + int x, y; + + for (y = 0; y < out_h; y += 16) { + for (x = 0; x < out_w; x += 16) { + int x_q4 = x * 16 * in_w / out_w; + int y_q4 = y * 16 * in_h / out_h; + uint8_t *src, *dst; + int src_stride, dst_stride; + + + src = src_fb->y_buffer + + y * in_h / out_h * src_fb->y_stride + + x * in_w / out_w; + dst = dst_fb->y_buffer + + y * dst_fb->y_stride + + x; + src_stride = src_fb->y_stride; + dst_stride = dst_fb->y_stride; + + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 16, 16); + + x_q4 >>= 1; + y_q4 >>= 1; + src_stride = src_fb->uv_stride; + dst_stride = dst_fb->uv_stride; + + src = src_fb->u_buffer + + y / 2 * in_h / out_h * src_fb->uv_stride + + x / 2 * in_w / out_w; + dst = dst_fb->u_buffer + + y / 2 * dst_fb->uv_stride + + x / 2; + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 8, 8); + + src = src_fb->v_buffer + + y / 2 * in_h / out_h * src_fb->uv_stride + + x / 2 * in_w / out_w; + dst = dst_fb->v_buffer + + y / 2 * dst_fb->uv_stride + + x / 2; + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 8, 8); + } + } + + vp8_yv12_extend_frame_borders(dst_fb); +} + + static void update_alt_ref_frame_stats(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; @@ -2416,9 +2480,9 @@ static void update_reference_frames(VP9_COMP * const cpi) { // If any buffer copy / swapping is signaled it should be done here. if (cm->frame_type == KEY_FRAME) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->gld_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { /* Preserve the previously existing golden frame and update the frame in * the alt ref slot instead. This is highly specific to the current use of @@ -2432,7 +2496,7 @@ static void update_reference_frames(VP9_COMP * const cpi) { int tmp; ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; @@ -2440,18 +2504,18 @@ static void update_reference_frames(VP9_COMP * const cpi) { } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } if (cpi->refresh_golden_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->gld_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); } } if (cpi->refresh_last_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->lst_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); } } @@ -2535,6 +2599,38 @@ static void select_interintra_mode(VP9_COMP *cpi) { } #endif +static void scale_references(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + int i; + + for (i = 0; i < 3; i++) { + YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]]; + + if (ref->y_width != cm->Width || ref->y_height != cm->Height) { + int new_fb = get_free_fb(cm); + + vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb], + cm->mb_cols * 16, + cm->mb_rows * 16, + VP9BORDERINPIXELS); + scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); + cpi->scaled_ref_idx[i] = new_fb; + } else { + cpi->scaled_ref_idx[i] = cm->ref_frame_map[i]; + cm->fb_idx_ref_cnt[cm->ref_frame_map[i]]++; + } + } +} + +static void release_scaled_references(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + int i; + + for (i = 0; i < 3; i++) { + cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--; + } +} + static void encode_frame_to_data_rate(VP9_COMP *cpi, unsigned long *size, unsigned char *dest, @@ -2583,6 +2679,17 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, int mcomp_filter_index = 0; int64_t mcomp_filter_cost[4]; + /* Scale the source buffer, if required */ + if (cm->Width != cpi->un_scaled_source->y_width || + cm->Height != cpi->un_scaled_source->y_height) { + scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source); + cpi->Source = &cpi->scaled_source; + } else { + cpi->Source = cpi->un_scaled_source; + } + + scale_references(cpi); + // Clear down mmx registers to allow floating point in what follows vp9_clear_system_state(); @@ -3231,6 +3338,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, update_reference_segmentation_map(cpi); } + release_scaled_references(cpi); update_reference_frames(cpi); vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4); vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); @@ -3373,7 +3481,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cpi->twopass.total_left_stats->coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f" - "%6d %5d %5d %5d %8d %8.2f %10d %10.3f" + "%6d %5d %5d %5d %8.2f %10d %10.3f" "%10.3f %8d %10d %10d %10d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, 0, //loop_size_estimate, @@ -3400,7 +3508,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, else fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f" - "%5d %5d %8d %8.2f %10d %10.3f" + "%5d %5d %8d %8d %8.2f %10d %10.3f" "%8d %10d %10d %10d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, @@ -3516,6 +3624,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, xd->update_mb_segmentation_data = 0; xd->mode_ref_lf_delta_update = 0; + // keep track of the last coded dimensions + cm->last_width = cm->Width; + cm->last_height = cm->Height; // Dont increment frame counters if this was an altref buffer update not a real frame if (cm->show_frame) { @@ -3533,8 +3644,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, FILE *recon_file; sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); recon_file = fopen(filename, "wb"); - fwrite(cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].buffer_alloc, - cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].frame_size, + fwrite(cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].buffer_alloc, + cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].frame_size, 1, recon_file); fclose(recon_file); } @@ -3756,28 +3867,16 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cm->fb_idx_ref_cnt[cm->new_fb_idx]--; cm->new_fb_idx = get_free_fb(cm); + /* Get the mapping of L/G/A to the reference buffer pool */ + cm->active_ref_idx[0] = cm->ref_frame_map[cpi->lst_fb_idx]; + cm->active_ref_idx[1] = cm->ref_frame_map[cpi->gld_fb_idx]; + cm->active_ref_idx[2] = cm->ref_frame_map[cpi->alt_fb_idx]; + /* Reset the frame pointers to the current frame size */ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], cm->mb_cols * 16, cm->mb_rows * 16, VP9BORDERINPIXELS); - /* Disable any references that have different size */ - if ((cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_width != - cm->yv12_fb[cm->new_fb_idx].y_width) || - (cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_height != - cm->yv12_fb[cm->new_fb_idx].y_height)) - cpi->ref_frame_flags &= ~VP9_LAST_FLAG; - if ((cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_width != - cm->yv12_fb[cm->new_fb_idx].y_width) || - (cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_height != - cm->yv12_fb[cm->new_fb_idx].y_height)) - cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; - if ((cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_width != - cm->yv12_fb[cm->new_fb_idx].y_width) || - (cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_height != - cm->yv12_fb[cm->new_fb_idx].y_height)) - cpi->ref_frame_flags &= ~VP9_ALT_FLAG; - vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); if (cpi->pass == 1) { Pass1Encode(cpi, size, dest, frame_flags); @@ -4027,18 +4126,31 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map, int vp9_set_internal_size(VP9_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode) { VP9_COMP *cpi = (VP9_COMP *) comp; + VP9_COMMON *cm = &cpi->common; - if (horiz_mode <= ONETWO) - cpi->horiz_scale = horiz_mode; - else + if (horiz_mode > ONETWO) return -1; - if (vert_mode <= ONETWO) - cpi->vert_scale = vert_mode; - else + if (vert_mode > ONETWO) return -1; - vp9_change_config(comp, &cpi->oxcf); + if (cm->horiz_scale != horiz_mode || cm->vert_scale != vert_mode) { + int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs); + int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs); + + cm->horiz_scale = horiz_mode; + cm->vert_scale = vert_mode; + + Scale2Ratio(cm->horiz_scale, &hr, &hs); + Scale2Ratio(cm->vert_scale, &vr, &vs); + + // always go to the next whole number + cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs; + cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; + } + assert(cm->Width <= cpi->initial_width); + assert(cm->Height <= cpi->initial_height); + update_frame_size(cpi); return 0; } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 9b509ea0b..02a371964 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -332,6 +332,7 @@ typedef struct VP9_COMP { int alt_is_last; // Alt reference frame same as last ( short circuit altref search) int gold_is_alt; // don't do both alt and gold search ( just do gold). + int scaled_ref_idx[3]; int lst_fb_idx; int gld_fb_idx; int alt_fb_idx; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index be091eee2..4f843005a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -402,6 +402,10 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; + ENTROPY_CONTEXT *const a1 = a + + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); + ENTROPY_CONTEXT *const l1 = l + + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); switch (tx_size) { case TX_4X4: @@ -416,6 +420,8 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, } break; case TX_8X8: + a_ec = (a[0] + a[1]) != 0; + l_ec = (l[0] + l[1]) != 0; scan = vp9_default_zig_zag1d_8x8; seg_eob = 64; break; @@ -425,12 +431,21 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, if (type == PLANE_TYPE_UV) { const int uv_idx = ib - 16; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; + a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; + } else { + a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; } break; case TX_32X32: scan = vp9_default_zig_zag1d_32x32; seg_eob = 1024; qcoeff_ptr = xd->sb_coeff_data.qcoeff; + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; break; default: abort(); @@ -459,6 +474,20 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, // is eob first coefficient; pt = (c > 0); *a = *l = pt; + if (tx_size >= TX_8X8) { + a[1] = l[1] = pt; + if (tx_size >= TX_16X16) { + if (type == PLANE_TYPE_UV) { + a1[0] = a1[1] = l1[0] = l1[1] = pt; + } else { + a[2] = a[3] = l[2] = l[3] = pt; + if (tx_size >= TX_32X32) { + a1[0] = a1[1] = a1[2] = a1[3] = pt; + l1[0] = l1[1] = l1[2] = l1[3] = pt; + } + } + } + } return cost; } @@ -701,15 +730,15 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { MACROBLOCKD * const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta, *tl; if (backup) { ta = (ENTROPY_CONTEXT *) &t_above, tl = (ENTROPY_CONTEXT *) &t_left; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); } else { ta = (ENTROPY_CONTEXT *) xd->above_context; tl = (ENTROPY_CONTEXT *) xd->left_context; @@ -1013,7 +1042,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, b->bmi.as_mode.first = mode; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); @@ -1046,7 +1075,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, // inverse transform if (best_tx_type != DCT_DCT) - vp9_short_iht4x4(best_dqcoeff, b->diff, 32, best_tx_type); + vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type); else xd->inv_txm4x4(best_dqcoeff, b->diff, 32); @@ -1279,8 +1308,9 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int distortion = 0, rate = 0; BLOCK *be = x->block + ib; BLOCKD *b = xd->block + ib; - ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0; - ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0; + ENTROPY_CONTEXT_PLANES ta, tl; + ENTROPY_CONTEXT *ta0, *ta1, besta0 = 0, besta1 = 0; + ENTROPY_CONTEXT *tl0, *tl1, bestl0 = 0, bestl1 = 0; /* * The predictor buffer is a 2d buffer with a stride of 16. Create @@ -1309,7 +1339,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { TX_TYPE tx_type = get_tx_type_8x8(xd, b); if (tx_type != DCT_DCT) - vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type); + vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); else x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); @@ -1317,23 +1347,29 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // compute quantization mse of 8x8 block distortion = vp9_block_error_c((x->block + idx)->coeff, (xd->block + idx)->dqcoeff, 64); - ta0 = a[vp9_block2above[TX_8X8][idx]]; - tl0 = l[vp9_block2left[TX_8X8][idx]]; + + vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_8X8][idx]; + tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_8X8][idx]; + ta1 = ta0 + 1; + tl1 = tl0 + 1; rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl0, TX_8X8); + ta0, tl0, TX_8X8); rate += rate_t; - ta1 = ta0; - tl1 = tl0; } else { static const int iblock[4] = {0, 1, 4, 5}; TX_TYPE tx_type; int i; - ta0 = a[vp9_block2above[TX_4X4][ib]]; - ta1 = a[vp9_block2above[TX_4X4][ib + 1]]; - tl0 = l[vp9_block2left[TX_4X4][ib]]; - tl1 = l[vp9_block2left[TX_4X4][ib + 4]]; + vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); + ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_4X4][ib]; + tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_4X4][ib]; + ta1 = ta0 + 1; + tl1 = tl0 + 1; distortion = 0; rate_t = 0; for (i = 0; i < 4; ++i) { @@ -1342,7 +1378,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); @@ -1354,15 +1390,13 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, - // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, - &ta0, &tl0, + i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); if (do_two) { + i++; rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC, - // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, - &ta0, &tl0, + i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); - i++; } } b = &xd->block[ib]; @@ -1376,10 +1410,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, *bestrate = rate; *bestratey = rate_t; *bestdistortion = distortion; - besta0 = ta0; - besta1 = ta1; - bestl0 = tl0; - bestl1 = tl1; + besta0 = *ta0; + besta1 = *ta1; + bestl0 = *tl0; + bestl1 = *tl1; best_rd = this_rd; *best_mode = mode; copy_predictor_8x8(best_predictor, b->predictor); @@ -1532,12 +1566,12 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { int b; int cost = 0; MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta, *tl; if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); ta = (ENTROPY_CONTEXT *) &t_above; tl = (ENTROPY_CONTEXT *) &t_left; @@ -1637,8 +1671,9 @@ static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, } static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skip, int fullpixel) { - vp9_build_inter4x4_predictors_mbuv(&x->e_mbd); + int *distortion, int *skip, int fullpixel, + int mb_row, int mb_col) { + vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1); @@ -2115,9 +2150,22 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, BLOCK *be = &x->block[i]; int thisdistortion; - vp9_build_inter_predictors_b(bd, 16, &xd->subpix); - if (xd->mode_info_context->mbmi.second_ref_frame > 0) - vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix); + vp9_build_inter_predictor(*(bd->base_pre) + bd->pre, + bd->pre_stride, + bd->predictor, 16, + &bd->bmi.as_mv[0], + &xd->scale_factor[0], + 4, 4, 0 /* no avg */, &xd->subpix); + + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + vp9_build_inter_predictor(*(bd->base_second_pre) + bd->pre, + bd->pre_stride, + bd->predictor, 16, + &bd->bmi.as_mv[1], + &xd->scale_factor[1], + 4, 4, 1 /* avg */, &xd->subpix); + } + vp9_subtract_b(be, bd, 16); x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, bd); @@ -2159,14 +2207,25 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, int ib = vp9_i8x8_block[i]; if (labels[ib] == which_label) { + const int use_second_ref = + xd->mode_info_context->mbmi.second_ref_frame > 0; + int which_mv; int idx = (ib & 8) + ((ib & 2) << 1); BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx]; BLOCK *be = &x->block[ib], *be2 = &x->block[idx]; int thisdistortion; - vp9_build_inter_predictors4b(xd, bd, 16); - if (xd->mode_info_context->mbmi.second_ref_frame > 0) - vp9_build_2nd_inter_predictors4b(xd, bd, 16); + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre; + + vp9_build_inter_predictor(*base_pre + bd->pre, + bd->pre_stride, + bd->predictor, 16, + &bd->bmi.as_mv[which_mv], + &xd->scale_factor[which_mv], + 8, 8, which_mv, &xd->subpix); + } + vp9_subtract_4b_c(be, bd, 16); if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { @@ -3050,26 +3109,45 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], int frame_mdcounts[4][4], - YV12_BUFFER_CONFIG yv12_mb[4]) { - YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx]; + YV12_BUFFER_CONFIG yv12_mb[4], + struct scale_factors scale[MAX_REF_FRAMES]) { + VP9_COMMON *cm = &cpi->common; + YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + int use_prev_in_find_mv_refs, use_prev_in_find_best_ref; - setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col); + // set up scaling factors + scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; + scale[frame_type].x_offset_q4 = + (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf; + scale[frame_type].y_offset_q4 = + (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf; + + // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this + // use the UV scaling factors. + setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col, + &scale[frame_type], &scale[frame_type]); // Gets an initial list of candidate vectors from neighbours and orders them + use_prev_in_find_mv_refs = cm->Width == cm->last_width && + cm->Height == cm->last_height && + !cpi->common.error_resilient_mode; vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context, - cpi->common.error_resilient_mode ? - 0 : xd->prev_mode_info_context, + use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL, frame_type, mbmi->ref_mvs[frame_type], cpi->common.ref_frame_sign_bias); // Candidate refinement carried out at encoder and decoder + use_prev_in_find_best_ref = + scale[frame_type].x_num == scale[frame_type].x_den && + scale[frame_type].y_num == scale[frame_type].y_den && + !cm->error_resilient_mode && + !cm->frame_parallel_decoding_mode; vp9_find_best_ref_mvs(xd, - cpi->common.error_resilient_mode || - cpi->common.frame_parallel_decoding_mode ? - 0 : yv12_mb[frame_type].y_buffer, + use_prev_in_find_best_ref ? + yv12_mb[frame_type].y_buffer : NULL, yv12->y_stride, mbmi->ref_mvs[frame_type], &frame_nearest_mv[frame_type], @@ -3140,7 +3218,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mode_index, INTERPOLATIONFILTERTYPE *best_filter, int_mv frame_mv[MB_MODE_COUNT] - [MAX_REF_FRAMES]) { + [MAX_REF_FRAMES], + YV12_BUFFER_CONFIG *scaled_ref_frame, + int mb_row, int mb_col) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -3184,6 +3264,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, 96, x->e_mbd.allow_high_precision_mv); } else { + YV12_BUFFER_CONFIG backup_yv12 = xd->pre; int bestsme = INT_MAX; int further_steps, step_param = cpi->sf.first_step; int sadpb = x->sadperbit16; @@ -3195,6 +3276,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; + if (scaled_ref_frame) { + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + xd->pre = *scaled_ref_frame; + xd->pre.y_buffer += mb_row * 16 * xd->pre.y_stride + mb_col * 16; + xd->pre.u_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8; + xd->pre.v_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8; + } + vp9_clamp_mv_min_max(x, &ref_mv[0]); // mvp_full.as_int = ref_mv[0].as_int; @@ -3237,6 +3328,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0], x->nmvjointcost, x->mvcost, 96, xd->allow_high_precision_mv); + + // restore the predictor, if required + if (scaled_ref_frame) { + xd->pre = backup_yv12; + } } break; case NEARMV: @@ -3318,7 +3414,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); var = vp9_variance64x64(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); // Note our transform coeffs are 8 times an orthogonal transform. @@ -3402,7 +3499,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); var = vp9_variance32x32(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); // Note our transform coeffs are 8 times an orthogonal transform. @@ -3482,19 +3580,21 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int sse, var; int tmp_rate_y, tmp_rate_u, tmp_rate_v; int tmp_dist_y, tmp_dist_u, tmp_dist_v; - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16); + // TODO(jkoleszar): these 2 y/uv should be replaced with one call to + // vp9_build_interintra_16x16_predictors_mb(). + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, + mb_row, mb_col); + #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); } #endif - vp9_build_1st_inter16x16_predictors_mbuv(xd, xd->predictor + 256, - xd->predictor + 320, 8); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mbuv(xd, xd->predictor + 256, - xd->predictor + 320, 8); + + vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256, + xd->predictor + 320, 8, + mb_row, mb_col); + #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mbuv(xd, xd->predictor + 256, @@ -3589,28 +3689,29 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else if (block_size == BLOCK_32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else { - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16); + // TODO(jkoleszar): These y/uv fns can be replaced with their mb + // equivalent + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); } #endif - vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); + vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], @@ -3805,6 +3906,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q); + struct scale_factors scale_factor[4]; + vpx_memset(mode8x8, 0, sizeof(mode8x8)); vpx_memset(&frame_mv, 0, sizeof(frame_mv)); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); @@ -3828,24 +3931,24 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->lst_fb_idx], + setup_buffer_inter(cpi, x, cpi->lst_fb_idx, LAST_FRAME, BLOCK_16X16, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb); + frame_mdcounts, yv12_mb, scale_factor); } if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->gld_fb_idx], + setup_buffer_inter(cpi, x, cpi->gld_fb_idx, GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb); + frame_mdcounts, yv12_mb, scale_factor); } if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->alt_fb_idx], + setup_buffer_inter(cpi, x, cpi->alt_fb_idx, ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb); + frame_mdcounts, yv12_mb, scale_factor); } *returnintra = INT64_MAX; @@ -3884,6 +3987,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif int mode_excluded = 0; int64_t txfm_cache[NB_TXFM_MODES] = { 0 }; + YV12_BUFFER_CONFIG *scaled_ref_frame; // These variables hold are rolling total cost and distortion for this mode rate2 = 0; @@ -3900,6 +4004,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; mbmi->interp_filter = cm->mcomp_filter_type; + + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); // Test best rd so far against threshold for trying this mode. @@ -3915,6 +4023,18 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame])) continue; + // only scale on zeromv. + if (mbmi->ref_frame > 0 && + (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && + this_mode != ZEROMV) + continue; + if (mbmi->second_ref_frame > 0 && + (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && + this_mode != ZEROMV) + continue; + // current coding mode under rate-distortion optimization test loop #if CONFIG_COMP_INTERINTRA_PRED mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); @@ -3947,12 +4067,25 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } /* everything but intra */ + scaled_ref_frame = NULL; if (mbmi->ref_frame) { int ref = mbmi->ref_frame; + int fb; xd->pre = yv12_mb[ref]; best_ref_mv = mbmi->ref_mvs[ref][0]; vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts)); + + if (mbmi->ref_frame == LAST_FRAME) { + fb = cpi->lst_fb_idx; + } else if (mbmi->ref_frame == GOLDEN_FRAME) { + fb = cpi->gld_fb_idx; + } else { + fb = cpi->alt_fb_idx; + } + + if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) + scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; } if (mbmi->second_ref_frame > 0) { @@ -4233,7 +4366,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, - cpi->common.full_pixel); + cpi->common.full_pixel, mb_row, mb_col); rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; @@ -4275,7 +4408,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &rate_y, &distortion, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv); + mode_index, &tmp_best_filter, frame_mv, + scaled_ref_frame, mb_row, mb_col); if (this_rd == INT64_MAX) continue; } @@ -4526,6 +4660,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; mbmi->partitioning = 0; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); @@ -4578,6 +4714,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } end: + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index], best_mode_index, &best_partition, &mbmi->ref_mvs[mbmi->ref_frame][0], @@ -4791,9 +4929,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int idx_list[4] = {0, - cpi->common.active_ref_idx[cpi->lst_fb_idx], - cpi->common.active_ref_idx[cpi->gld_fb_idx], - cpi->common.active_ref_idx[cpi->alt_fb_idx]}; + cpi->lst_fb_idx, + cpi->gld_fb_idx, + cpi->alt_fb_idx}; int mdcounts[4]; int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; @@ -4820,6 +4958,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0; int dist_uv_16x16 = 0, uv_skip_16x16 = 0; MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV; + struct scale_factors scale_factor[4]; xd->mode_info_context->mbmi.segment_id = segment_id; estimate_ref_frame_costs(cpi, segment_id, ref_costs); @@ -4835,7 +4974,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], frame_mdcounts, - yv12_mb); + yv12_mb, scale_factor); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; @@ -4914,6 +5053,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } mbmi->ref_frame = ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); comp_pred = mbmi->second_ref_frame > INTRA_FRAME; mbmi->mode = this_mode; mbmi->uv_mode = DC_PRED; @@ -4921,6 +5062,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif + // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. mbmi->interp_filter = cm->mcomp_filter_type; @@ -4945,6 +5087,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->ref_frame_flags & flag_list[second_ref])) continue; mbmi->second_ref_frame = second_ref; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); xd->second_pre = yv12_mb[second_ref]; mode_excluded = @@ -5022,6 +5166,20 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv; distortion2 = distortion_y + distortion_uv; } else { + YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; + int fb; + + if (mbmi->ref_frame == LAST_FRAME) { + fb = cpi->lst_fb_idx; + } else if (mbmi->ref_frame == GOLDEN_FRAME) { + fb = cpi->gld_fb_idx; + } else { + fb = cpi->alt_fb_idx; + } + + if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) + scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; + #if CONFIG_COMP_INTERINTRA_PRED if (mbmi->second_ref_frame == INTRA_FRAME) { if (best_intra16_mode == DC_PRED - 1) continue; @@ -5043,7 +5201,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &rate_y, &distortion_y, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv); + mode_index, &tmp_best_filter, frame_mv, + scaled_ref_frame, mb_row, mb_col); if (this_rd == INT64_MAX) continue; } @@ -5296,6 +5455,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } end: + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); { PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ? &x->sb32_context[xd->sb_index] : diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 710ae58fe..01b156044 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -45,18 +45,4 @@ extern void vp9_init_me_luts(); extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); -static void setup_pred_block(YV12_BUFFER_CONFIG *dst, - const YV12_BUFFER_CONFIG *src, - int mb_row, int mb_col) { - const int recon_y_stride = src->y_stride; - const int recon_uv_stride = src->uv_stride; - const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; - const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; - - *dst = *src; - dst->y_buffer += recon_yoffset; - dst->u_buffer += recon_uvoffset; - dst->v_buffer += recon_uvoffset; -} - #endif // VP9_ENCODER_VP9_RDOPT_H_ diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index d016e52cc..a6cd1c0c3 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -12,6 +12,7 @@ #include <limits.h> #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/common/vp9_systemdependent.h" #include "vp9/encoder/vp9_quantize.h" @@ -42,40 +43,35 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, int mv_row, int mv_col, uint8_t *pred) { - int offset; - uint8_t *yptr, *uptr, *vptr; - int omv_row, omv_col; - - // Y - yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); - - xd->subpix.predict[!!(mv_col & 7)][!!(mv_row & 7)][0]( - yptr, stride, &pred[0], 16, - xd->subpix.filter_x[(mv_col & 7) << 1], xd->subpix.x_step_q4, - xd->subpix.filter_y[(mv_row & 7) << 1], xd->subpix.y_step_q4, - 16, 16); - - // U & V - omv_row = mv_row; - omv_col = mv_col; - mv_row >>= 1; - mv_col >>= 1; + const int which_mv = 0; + int_mv subpel_mv; + int_mv fullpel_mv; + + subpel_mv.as_mv.row = mv_row; + subpel_mv.as_mv.col = mv_col; + // TODO(jkoleszar): Make this rounding consistent with the rest of the code + fullpel_mv.as_mv.row = (mv_row >> 1) & ~7; + fullpel_mv.as_mv.col = (mv_col >> 1) & ~7; + + vp9_build_inter_predictor(y_mb_ptr, stride, + &pred[0], 16, + &subpel_mv, + &xd->scale_factor[which_mv], + 16, 16, which_mv, &xd->subpix); + stride = (stride + 1) >> 1; - offset = (mv_row >> 3) * stride + (mv_col >> 3); - uptr = u_mb_ptr + offset; - vptr = v_mb_ptr + offset; - - xd->subpix.predict[!!(omv_col & 15)][!!(omv_row & 15)][0]( - uptr, stride, &pred[256], 8, - xd->subpix.filter_x[(omv_col & 15)], xd->subpix.x_step_q4, - xd->subpix.filter_y[(omv_row & 15)], xd->subpix.y_step_q4, - 8, 8); - - xd->subpix.predict[!!(omv_col & 15)][!!(omv_row & 15)][0]( - vptr, stride, &pred[320], 8, - xd->subpix.filter_x[(omv_col & 15)], xd->subpix.x_step_q4, - xd->subpix.filter_y[(omv_row & 15)], xd->subpix.y_step_q4, - 8, 8); + + vp9_build_inter_predictor_q4(u_mb_ptr, stride, + &pred[256], 8, + &fullpel_mv, &subpel_mv, + &xd->scale_factor_uv[which_mv], + 8, 8, which_mv, &xd->subpix); + + vp9_build_inter_predictor_q4(v_mb_ptr, stride, + &pred[320], 8, + &fullpel_mv, &subpel_mv, + &xd->scale_factor_uv[which_mv], + 8, 8, which_mv, &xd->subpix); } void vp9_temporal_filter_apply_c(uint8_t *frame1, @@ -460,6 +456,13 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { , start_frame); #endif + // Setup scaling factors. Scaling on each of the arnr frames is not supported + vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0], + &cpi->common.yv12_fb[cpi->common.new_fb_idx], + 16 * cpi->common.mb_cols, + 16 * cpi->common.mb_rows); + cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0]; + // Setup frame pointers, NULL indicates frame not included in filter vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; frame++) { diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 8efc97697..1b07359da 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -145,17 +145,14 @@ static void tokenize_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_4x4; break; case TX_8X8: -#if CONFIG_CNVCONTEXT a_ec = (a[0] + a[1]) != 0; l_ec = (l[0] + l[1]) != 0; -#endif seg_eob = 64; scan = vp9_default_zig_zag1d_8x8; counts = cpi->coef_counts_8x8; probs = cpi->common.fc.coef_probs_8x8; break; case TX_16X16: -#if CONFIG_CNVCONTEXT if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; @@ -163,7 +160,6 @@ static void tokenize_b(VP9_COMP *cpi, a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; } -#endif seg_eob = 256; scan = vp9_default_zig_zag1d_16x16; counts = cpi->coef_counts_16x16; @@ -174,14 +170,12 @@ static void tokenize_b(VP9_COMP *cpi, } break; case TX_32X32: -#if CONFIG_CNVCONTEXT a_ec = a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]; l_ec = l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]; a_ec = a_ec != 0; l_ec = l_ec != 0; -#endif seg_eob = 1024; scan = vp9_default_zig_zag1d_32x32; counts = cpi->coef_counts_32x32; @@ -635,15 +629,12 @@ static INLINE void stuff_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_4x4; break; case TX_8X8: -#if CONFIG_CNVCONTEXT a_ec = (a[0] + a[1]) != 0; l_ec = (l[0] + l[1]) != 0; -#endif counts = cpi->coef_counts_8x8; probs = cpi->common.fc.coef_probs_8x8; break; case TX_16X16: -#if CONFIG_CNVCONTEXT if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; @@ -651,19 +642,16 @@ static INLINE void stuff_b(VP9_COMP *cpi, a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; } -#endif counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; break; case TX_32X32: -#if CONFIG_CNVCONTEXT a_ec = a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]; l_ec = l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]; a_ec = a_ec != 0; l_ec = l_ec != 0; -#endif counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; break; @@ -30,6 +30,7 @@ #endif #include "tools_common.h" #include "nestegg/include/nestegg/nestegg.h" +#include "third_party/libyuv/include/libyuv/scale.h" #if CONFIG_OS_SUPPORT #if defined(_MSC_VER) @@ -93,6 +94,8 @@ static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, "Show version string"); static const arg_def_t error_concealment = ARG_DEF(NULL, "error-concealment", 0, "Enable decoder error-concealment"); +static const arg_def_t scalearg = ARG_DEF("S", "scale", 0, + "Scale output frames uniformly"); #if CONFIG_MD5 @@ -102,7 +105,7 @@ static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0, static const arg_def_t *all_args[] = { &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg, &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile, - &threadsarg, &verbosearg, + &threadsarg, &verbosearg, &scalearg, #if CONFIG_MD5 &md5arg, #endif @@ -708,6 +711,9 @@ int main(int argc, const char **argv_) { struct input_ctx input = {0}; int frames_corrupted = 0; int dec_flags = 0; + int do_scale; + int stream_w = 0, stream_h = 0; + vpx_image_t *scaled_img = NULL; /* Parse command line */ exec_name = argv_[0]; @@ -757,6 +763,8 @@ int main(int argc, const char **argv_) { cfg.threads = arg_parse_uint(&arg); else if (arg_match(&arg, &verbosearg, argi)) quiet = 0; + else if (arg_match(&arg, &scalearg, argi)) + do_scale = 1; #if CONFIG_VP8_DECODER else if (arg_match(&arg, &addnoise_level, argi)) { @@ -1015,6 +1023,30 @@ int main(int argc, const char **argv_) { show_progress(frame_in, frame_out, dx_time); if (!noblit) { + if (do_scale) { + if (frame_out == 1) { + stream_w = img->d_w; + stream_h = img->d_h; + scaled_img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, + stream_w, stream_h, 16); + } + if (img && (img->d_w != stream_w || img->d_h != stream_h)) { + I420Scale(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + img->d_w, img->d_h, + scaled_img->planes[VPX_PLANE_Y], + scaled_img->stride[VPX_PLANE_Y], + scaled_img->planes[VPX_PLANE_U], + scaled_img->stride[VPX_PLANE_U], + scaled_img->planes[VPX_PLANE_V], + scaled_img->stride[VPX_PLANE_V], + stream_w, stream_h, + kFilterBox); + img = scaled_img; + } + } + if (img) { unsigned int y; char out_fn[PATH_MAX]; @@ -47,6 +47,7 @@ #include "y4minput.h" #include "libmkv/EbmlWriter.h" #include "libmkv/EbmlIDs.h" +#include "third_party/libyuv/include/libyuv/scale.h" /* Need special handling of these functions on Windows */ #if defined(_MSC_VER) @@ -1642,6 +1643,7 @@ struct stream_state { uint64_t cx_time; size_t nbytes; stats_io_t stats; + struct vpx_image *img; vpx_codec_ctx_t decoder; vpx_ref_frame_t ref_enc; vpx_ref_frame_t ref_dec; @@ -2061,11 +2063,15 @@ static void validate_stream_config(struct stream_state *stream) { static void set_stream_dimensions(struct stream_state *stream, unsigned int w, unsigned int h) { - if ((stream->config.cfg.g_w && stream->config.cfg.g_w != w) - || (stream->config.cfg.g_h && stream->config.cfg.g_h != h)) - fatal("Stream %d: Resizing not yet supported", stream->index); - stream->config.cfg.g_w = w; - stream->config.cfg.g_h = h; + if (!stream->config.cfg.g_w) { + if (!stream->config.cfg.g_h) + stream->config.cfg.g_w = w; + else + stream->config.cfg.g_w = w * stream->config.cfg.g_h / h; + } + if (!stream->config.cfg.g_h) { + stream->config.cfg.g_h = h * stream->config.cfg.g_w / w; + } } @@ -2258,6 +2264,28 @@ static void encode_frame(struct stream_state *stream, next_frame_start = (cfg->g_timebase.den * (int64_t)(frames_in) * global->framerate.den) / cfg->g_timebase.num / global->framerate.num; + + /* Scale if necessary */ + if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) { + if (!stream->img) + stream->img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, + cfg->g_w, cfg->g_h, 16); + I420Scale(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + img->d_w, img->d_h, + stream->img->planes[VPX_PLANE_Y], + stream->img->stride[VPX_PLANE_Y], + stream->img->planes[VPX_PLANE_U], + stream->img->stride[VPX_PLANE_U], + stream->img->planes[VPX_PLANE_V], + stream->img->stride[VPX_PLANE_V], + stream->img->d_w, stream->img->d_h, + kFilterBox); + + img = stream->img; + } + vpx_usec_timer_start(&timer); vpx_codec_encode(&stream->encoder, img, frame_start, (unsigned long)(next_frame_start - frame_start), @@ -2518,6 +2546,9 @@ int main(int argc, const char **argv_) { }); /* Update stream configurations from the input file's parameters */ + if (!input.w || !input.h) + fatal("Specify stream dimensions with --width (-w) " + " and --height (-h)"); FOREACH_STREAM(set_stream_dimensions(stream, input.w, input.h)); FOREACH_STREAM(validate_stream_config(stream)); |