diff options
40 files changed, 475 insertions, 472 deletions
@@ -246,6 +246,7 @@ EXPERIMENT_LIST=" enable_6tap abovesprefmv code_nonzerocount + useselectrefmv " CONFIG_LIST=" external_build diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index f7709ecd4..dfb64c3a2 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -120,7 +120,7 @@ TEST(Vp9Fdct4x4Test, RoundTripErrorCheck) { } // Because the bitstream is not frozen yet, use the idct in the codebase. - vp9_short_idct4x4llm_c(test_temp_block, test_output_block, pitch); + vp9_short_idct4x4_c(test_temp_block, test_output_block, pitch); for (int j = 0; j < 16; ++j) { const int diff = test_input_block[j] - test_output_block[j]; diff --git a/test/idctllm_test.cc b/test/idct_test.cc index d6fdffea5..51fb65a43 100644 --- a/test/idctllm_test.cc +++ b/test/idct_test.cc @@ -10,8 +10,8 @@ extern "C" { -#include "vpx_config.h" -#include "vp8_rtcd.h" +#include "./vpx_config.h" +#include "./vp8_rtcd.h" } #include "test/register_state_check.h" #include "third_party/googletest/src/include/gtest/gtest.h" @@ -20,18 +20,16 @@ typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride); namespace { -class IDCTTest : public ::testing::TestWithParam<idct_fn_t> -{ +class IDCTTest : public ::testing::TestWithParam<idct_fn_t> { protected: - virtual void SetUp() - { + virtual void SetUp() { int i; UUT = GetParam(); memset(input, 0, sizeof(input)); /* Set up guard blocks */ - for(i=0; i<256; i++) - output[i] = ((i&0xF)<4&&(i<64))?0:-1; + for (i = 0; i < 256; i++) + output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1; } idct_fn_t UUT; @@ -40,78 +38,72 @@ class IDCTTest : public ::testing::TestWithParam<idct_fn_t> unsigned char predict[256]; }; -TEST_P(IDCTTest, TestGuardBlocks) -{ +TEST_P(IDCTTest, TestGuardBlocks) { int i; - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) EXPECT_EQ(0, output[i]) << i; else EXPECT_EQ(255, output[i]); } -TEST_P(IDCTTest, TestAllZeros) -{ +TEST_P(IDCTTest, TestAllZeros) { int i; REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) EXPECT_EQ(0, output[i]) << "i==" << i; else EXPECT_EQ(255, output[i]) << "i==" << i; } -TEST_P(IDCTTest, TestAllOnes) -{ +TEST_P(IDCTTest, TestAllOnes) { int i; input[0] = 4; REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) EXPECT_EQ(1, output[i]) << "i==" << i; else EXPECT_EQ(255, output[i]) << "i==" << i; } -TEST_P(IDCTTest, TestAddOne) -{ +TEST_P(IDCTTest, TestAddOne) { int i; - for(i=0; i<256; i++) + for (i = 0; i < 256; i++) predict[i] = i; - input[0] = 4; REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16)); - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) EXPECT_EQ(i+1, output[i]) << "i==" << i; else EXPECT_EQ(255, output[i]) << "i==" << i; } -TEST_P(IDCTTest, TestWithData) -{ +TEST_P(IDCTTest, TestWithData) { int i; - for(i=0; i<16; i++) + for (i = 0; i < 16; i++) input[i] = i; REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); - for(i=0; i<256; i++) - if((i&0xF) > 3 || i>63) + for (i = 0; i < 256; i++) + if ((i & 0xF) > 3 || i > 63) EXPECT_EQ(255, output[i]) << "i==" << i; - else if(i == 0) + else if (i == 0) EXPECT_EQ(11, output[i]) << "i==" << i; - else if(i == 34) + else if (i == 34) EXPECT_EQ(1, output[i]) << "i==" << i; - else if(i == 2 || i == 17 || i == 32) + else if (i == 2 || i == 17 || i == 32) EXPECT_EQ(3, output[i]) << "i==" << i; else EXPECT_EQ(0, output[i]) << "i==" << i; diff --git a/test/test.mk b/test/test.mk index 37e4ee793..793fbf8b2 100644 --- a/test/test.mk +++ b/test/test.mk @@ -47,7 +47,7 @@ ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc endif -LIBVPX_TEST_SRCS-yes += idctllm_test.cc +LIBVPX_TEST_SRCS-yes += idct_test.cc LIBVPX_TEST_SRCS-yes += intrapred_test.cc LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc diff --git a/vp9/common/ppc/vp9_idctllm_altivec.asm b/vp9/common/ppc/vp9_idct_altivec.asm index 117d9cfc8..b87aa4200 100644 --- a/vp9/common/ppc/vp9_idctllm_altivec.asm +++ b/vp9/common/ppc/vp9_idct_altivec.asm @@ -9,7 +9,7 @@ ; - .globl short_idct4x4llm_ppc + .globl short_idct4x4_ppc .macro load_c V, LABEL, OFF, R0, R1 lis \R0, \LABEL@ha @@ -21,7 +21,7 @@ ;# r4 short *output ;# r5 int pitch .align 2 -short_idct4x4llm_ppc: +short_idct4x4_ppc: mfspr r11, 256 ;# get old VRSAVE oris r12, r11, 0xfff8 mtspr 256, r12 ;# set VRSAVE diff --git a/vp9/common/ppc/vp9_systemdependent.c b/vp9/common/ppc/vp9_systemdependent.c index 02035191f..ac13722d4 100644 --- a/vp9/common/ppc/vp9_systemdependent.c +++ b/vp9/common/ppc/vp9_systemdependent.c @@ -63,7 +63,7 @@ void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_pt void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride); void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride); -extern void short_idct4x4llm_ppc(short *input, short *output, int pitch); +extern void short_idct4x4_ppc(short *input, short *output, int pitch); // Generic C extern subpixel_predict_function vp9_sixtap_predict_c; @@ -83,8 +83,8 @@ void vp9_recon_b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ void vp9_recon2b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride); void vp9_recon4b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride); -extern void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch); -extern void vp9_short_idct4x4llm_c(short *input, short *output, int pitch); +extern void vp9_short_idct4x4_1_c(short *input, short *output, int pitch); +extern void vp9_short_idct4x4_c(short *input, short *output, int pitch); extern void vp8_dc_only_idct_c(short input_dc, short *output, int pitch); // PPC @@ -139,8 +139,8 @@ void vp9_machine_specific_config(void) { vp9_sixtap_predict8x4 = sixtap_predict8x4_ppc; vp9_sixtap_predict = sixtap_predict_ppc; - vp8_short_idct4x4_1 = vp9_short_idct4x4llm_1_c; - vp8_short_idct4x4 = short_idct4x4llm_ppc; + vp8_short_idct4x4_1 = vp9_short_idct4x4_1_c; + vp8_short_idct4x4 = short_idct4x4_ppc; vp8_dc_only_idct = vp8_dc_only_idct_c; vp8_lf_mbvfull = loop_filter_mbv_ppc; diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index c3d6dae93..15c8c0d64 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -67,16 +67,13 @@ void vp9_de_alloc_frame_buffers(VP9_COMMON *oci) { int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { int i; + int aligned_width, aligned_height; vp9_de_alloc_frame_buffers(oci); /* our internal buffers are always multiples of 16 */ - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if ((height & 0xf) != 0) - height += 16 - (height & 0xf); - + aligned_width = (width + 15) & ~15; + aligned_height = (height + 15) & ~15; for (i = 0; i < NUM_YV12_BUFFERS; i++) { oci->fb_idx_ref_cnt[i] = 0; @@ -110,8 +107,8 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { return 1; } - oci->mb_rows = height >> 4; - oci->mb_cols = width >> 4; + oci->mb_rows = aligned_height >> 4; + oci->mb_cols = aligned_width >> 4; oci->MBs = oci->mb_rows * oci->mb_cols; oci->mode_info_stride = oci->mb_cols + 1; oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 8409885a0..f6d6932cc 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -118,10 +118,12 @@ unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); } +#if CONFIG_USESELECTREFMV /* check a list of motion vectors by sad score using a number rows of pixels * above and a number cols of pixels in the left to select the one with best * score to use as ref motion vector */ + void vp9_find_best_ref_mvs(MACROBLOCKD *xd, uint8_t *ref_y_buffer, int ref_y_stride, @@ -298,3 +300,20 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, // Copy back the re-ordered mv list vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs)); } +#else +void vp9_find_best_ref_mvs(MACROBLOCKD *xd, + uint8_t *ref_y_buffer, + int ref_y_stride, + int_mv *mvlist, + int_mv *nearest, + int_mv *near) { + int i; + // Make sure all the candidates are properly clamped etc + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { + lower_mv_precision(&mvlist[i], xd->allow_high_precision_mv); + clamp_mv2(&mvlist[i], xd); + } + *nearest = mvlist[0]; + *near = mvlist[1]; +} +#endif diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idct.c index e2106250f..3ec093f73 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idct.c @@ -8,20 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - -/**************************************************************************** - * Notes: - * - * This implementation makes use of 16 bit fixed point verio of two multiply - * constants: - * 1. sqrt(2) * cos (pi/8) - * 2. sqrt(2) * sin (pi/8) - * Becuase the first constant is bigger than 1, to maintain the same 16 bit - * fixed point precision as the second one, we use a trick of - * x * a = x + x*(a-1) - * so - * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). - **************************************************************************/ #include <assert.h> #include <math.h> @@ -32,7 +18,7 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_short_inv_walsh4x4_x8_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_iwalsh4x4_c(int16_t *input, int16_t *output, int pitch) { int i; int a1, b1, c1, d1; int16_t *ip = input; @@ -73,7 +59,7 @@ void vp9_short_inv_walsh4x4_x8_c(int16_t *input, int16_t *output, int pitch) { } } -void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) { +void vp9_short_iwalsh4x4_1_c(int16_t *in, int16_t *out, int pitch) { int i; int16_t tmp[4]; int16_t *ip = in; @@ -99,7 +85,7 @@ void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr, int r, c; int16_t dc = input_dc; int16_t tmp[4 * 4]; - vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1); + vp9_short_iwalsh4x4_1_c(&dc, tmp, 4 << 1); for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) @@ -130,7 +116,7 @@ void vp9_idct4_1d_c(int16_t *input, int16_t *output) { output[3] = step[0] - step[3]; } -void vp9_short_idct4x4llm_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct4x4_c(int16_t *input, int16_t *output, int pitch) { int16_t out[4 * 4]; int16_t *outptr = out; const int half_pitch = pitch >> 1; @@ -156,7 +142,7 @@ void vp9_short_idct4x4llm_c(int16_t *input, int16_t *output, int pitch) { } } -void vp9_short_idct4x4llm_1_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct4x4_1_c(int16_t *input, int16_t *output, int pitch) { int i; int a1; int16_t *op = output; diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 79d060945..55bcccb0e 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -211,8 +211,10 @@ extern "C" int vp9_update_reference(VP9_PTR comp, int ref_frame_flags); - int vp9_get_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); + int vp9_copy_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + + int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb); int vp9_set_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index a0867ae7f..f98ec442d 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -20,8 +20,8 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, YV12_BUFFER_CONFIG *other, int this_w, int this_h) { - int other_h = other->y_height; - int other_w = other->y_width; + int other_h = other->y_crop_height; + int other_w = other->y_crop_width; scale->x_num = other_w; scale->x_den = this_w; @@ -95,7 +95,7 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i], &cm->yv12_fb[cm->active_ref_idx[i]], - cm->mb_cols * 16, cm->mb_rows * 16); + cm->Width, cm->Height); } if (xd->mode_info_context) { diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index 3031fb699..b97b6089d 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -17,9 +17,10 @@ void vp9_recon_intra_mbuv(MACROBLOCKD *xd); B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n); + int stride, int n, + int tx, int ty); -B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x); +B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x); #if CONFIG_COMP_INTERINTRA_PRED void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index 7fbee7c32..eab5ab495 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -15,17 +15,17 @@ #include "vp9_rtcd.h" #if CONFIG_NEWBINTRAMODES -static int find_grad_measure(uint8_t *x, int stride, int n, int t, +static int find_grad_measure(uint8_t *x, int stride, int n, int tx, int ty, int dx, int dy) { int i, j; int count = 0, gsum = 0, gdiv; /* TODO: Make this code more efficient by breaking up into two loops */ - for (i = -t; i < n; ++i) - for (j = -t; j < n; ++j) { + for (i = -ty; i < n; ++i) + for (j = -tx; j < n; ++j) { int g; if (i >= 0 && j >= 0) continue; if (i + dy >= 0 && j + dx >= 0) continue; - if (i + dy < -t || i + dy >= n || j + dx < -t || j + dx >= n) continue; + if (i + dy < -ty || i + dy >= n || j + dx < -tx || j + dx >= n) continue; g = abs(x[(i + dy) * stride + j + dx] - x[i * stride + j]); gsum += g * g; count++; @@ -36,14 +36,15 @@ static int find_grad_measure(uint8_t *x, int stride, int n, int t, #if CONTEXT_PRED_REPLACEMENTS == 6 B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n) { + int stride, int n, + int tx, int ty) { int g[8], i, imin, imax; - g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1); - g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1); - g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2); - g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2); - g[6] = find_grad_measure(ptr, stride, n, 4, -1, 1); - g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1); + g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1); + g[2] = find_grad_measure(ptr, stride, n, tx, ty, 1, 1); + g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2); + g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2); + g[6] = find_grad_measure(ptr, stride, n, tx, ty, -1, 1); + g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1); imin = 1; for (i = 2; i < 8; i += 1 + (i == 3)) imin = (g[i] < g[imin] ? i : imin); @@ -73,12 +74,13 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, } #elif CONTEXT_PRED_REPLACEMENTS == 4 B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n) { + int stride, int n, + int tx, int ty) { int g[8], i, imin, imax; - g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1); - g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2); - g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2); - g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1); + g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1); + g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2); + g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2); + g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1); imin = 1; for (i = 3; i < 8; i+=2) imin = (g[i] < g[imin] ? i : imin); @@ -104,16 +106,17 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, } #elif CONTEXT_PRED_REPLACEMENTS == 0 B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n) { + int stride, int n, + int tx, int ty) { int g[8], i, imin, imax; - g[0] = find_grad_measure(ptr, stride, n, 4, 1, 0); - g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1); - g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1); - g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2); - g[4] = find_grad_measure(ptr, stride, n, 4, 0, 1); - g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2); - g[6] = find_grad_measure(ptr, stride, n, 4, -1, 1); - g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1); + g[0] = find_grad_measure(ptr, stride, n, tx, ty, 1, 0); + g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1); + g[2] = find_grad_measure(ptr, stride, n, tx, ty, 1, 1); + g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2); + g[4] = find_grad_measure(ptr, stride, n, tx, ty, 0, 1); + g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2); + g[6] = find_grad_measure(ptr, stride, n, tx, ty, -1, 1); + g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1); imax = 0; for (i = 1; i < 8; i++) imax = (g[i] > g[imax] ? i : imax); @@ -144,10 +147,17 @@ B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, } #endif -B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x) { +B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x) { + const int block_idx = x - xd->block; + const int have_top = (block_idx >> 2) || xd->up_available; + const int have_left = (block_idx & 3) || xd->left_available; uint8_t *ptr = *(x->base_dst) + x->dst; int stride = x->dst_stride; - return vp9_find_dominant_direction(ptr, stride, 4); + int tx = have_left ? 4 : 0; + int ty = have_top ? 4 : 0; + if (!have_left && !have_top) + return B_DC_PRED; + return vp9_find_dominant_direction(ptr, stride, 4, tx, ty); } #endif diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 911fcc55e..0c2a5c94a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -279,11 +279,11 @@ specialize vp9_convolve8_avg_vert ssse3 # # dct # -prototype void vp9_short_idct4x4llm_1 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct4x4llm_1 +prototype void vp9_short_idct4x4_1 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_idct4x4_1 -prototype void vp9_short_idct4x4llm "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct4x4llm sse2 +prototype void vp9_short_idct4x4 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_idct4x4 sse2 prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct8x8 @@ -330,10 +330,10 @@ specialize vp9_idct4_1d sse2 prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" specialize vp9_dc_only_idct_add sse2 -prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_inv_walsh4x4_1_x8 -prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_inv_walsh4x4_x8 +prototype void vp9_short_iwalsh4x4_1 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_iwalsh4x4_1 +prototype void vp9_short_iwalsh4x4 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_iwalsh4x4 prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" specialize vp9_dc_only_inv_walsh_add @@ -600,11 +600,11 @@ specialize vp9_short_fdct32x32 prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct16x16 sse2 -prototype void vp9_short_walsh4x4_x8 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_walsh4x4_x8 +prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch" +specialize vp9_short_walsh4x4 -prototype void vp9_short_walsh8x4_x8 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_walsh8x4_x8 +prototype void vp9_short_walsh8x4 "int16_t *InputData, int16_t *OutputData, int pitch" +specialize vp9_short_walsh8x4 # # Motion search diff --git a/vp9/common/x86/vp9_idctllm_sse2.asm b/vp9/common/x86/vp9_idct_sse2.asm index 8f3c6dfc3..8f3c6dfc3 100644 --- a/vp9/common/x86/vp9_idctllm_sse2.asm +++ b/vp9/common/x86/vp9_idct_sse2.asm diff --git a/vp9/common/x86/vp9_idctllm_x86.c b/vp9/common/x86/vp9_idct_x86.c index 3d7a1481c..6a35823bd 100644 --- a/vp9/common/x86/vp9_idctllm_x86.c +++ b/vp9/common/x86/vp9_idct_x86.c @@ -74,7 +74,7 @@ void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr, *(int *)dst_ptr = _mm_cvtsi128_si32(p1); } -void vp9_short_idct4x4llm_sse2(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct4x4_sse2(int16_t *input, int16_t *output, int pitch) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index bfc0a9dde..b53e419b5 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -138,14 +138,14 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *mb) { if (mb->lossless) { assert(qindex == 0); - mb->inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; - mb->inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + mb->inv_txm4x4_1 = vp9_short_iwalsh4x4_1; + mb->inv_txm4x4 = vp9_short_iwalsh4x4; mb->itxm_add = vp9_dequant_idct_add_lossless_c; mb->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; } else { - mb->inv_txm4x4_1 = vp9_short_idct4x4llm_1; - mb->inv_txm4x4 = vp9_short_idct4x4llm; + mb->inv_txm4x4_1 = vp9_short_idct4x4_1; + mb->inv_txm4x4 = vp9_short_idct4x4; mb->itxm_add = vp9_dequant_idct_add; mb->itxm_add_y_block = vp9_dequant_idct_add_y_block; mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; @@ -377,7 +377,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, int b_mode = xd->mode_info_context->bmi[i].as_mode.first; #if CONFIG_NEWBINTRAMODES xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context = - vp9_find_bpred_context(b); + vp9_find_bpred_context(xd, b); #endif if (!xd->mode_info_context->mbmi.mb_skip_coeff) eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); @@ -1364,7 +1364,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { /* Reset the frame pointers to the current frame size */ vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx], - pc->mb_cols * 16, pc->mb_rows * 16, + pc->Width, pc->Height, VP9BORDERINPIXELS); if (vp9_start_decode(&header_bc, data, diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 92b78ed19..cb4601a15 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -126,7 +126,7 @@ void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, input[i] *= dq[i]; // the idct halves ( >> 1) the pitch - vp9_short_idct4x4llm(input, output, 4 << 1); + vp9_short_idct4x4(input, output, 4 << 1); vpx_memset(input, 0, 32); @@ -148,7 +148,7 @@ void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, input[i] *= dq[i]; // the idct halves ( >> 1) the pitch - vp9_short_idct4x4llm(input, output, 4 << 1); + vp9_short_idct4x4(input, output, 4 << 1); vpx_memset(input, 0, 32); vp9_add_residual_4x4(output, pred, pitch, dest, stride); } @@ -163,7 +163,7 @@ void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, for (i = 0; i < 16; i++) input[i] *= dq[i]; - vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); + vp9_short_iwalsh4x4_c(input, output, 4 << 1); vpx_memset(input, 0, 32); @@ -186,7 +186,7 @@ void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, for (i = 1; i < 16; i++) input[i] *= dq[i]; - vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); + vp9_short_iwalsh4x4_c(input, output, 4 << 1); vpx_memset(input, 0, 32); vp9_add_residual_4x4(output, pred, pitch, dest, stride); } diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h index 748fc7ea3..cd71166e4 100644 --- a/vp9/decoder/vp9_onyxd.h +++ b/vp9/decoder/vp9_onyxd.h @@ -46,14 +46,16 @@ extern "C" { int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags); - vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR comp, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); + vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR comp, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR comp, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb); + VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf); void vp9_remove_decompressor(VP9D_PTR comp); diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 63895800d..5cb2a095b 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -164,8 +164,9 @@ void vp9_remove_decompressor(VP9D_PTR ptr) { } -vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { +vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd) { VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; int ref_fb_idx; @@ -242,6 +243,17 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, } +int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) { + VP9D_COMP *pbi = (VP9D_COMP *) ptr; + VP9_COMMON *cm = &pbi->common; + + if (index < 0 || index >= NUM_REF_FRAMES) + return -1; + + *fb = &cm->yv12_fb[cm->ref_frame_map[index]]; + return 0; +} + /* If any buffer updating is signalled it should be done here. */ static void swap_frame_buffers(VP9D_COMP *pbi) { int ref_index = 0, mask; diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index bb6e44fea..6365ed9a2 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -553,7 +553,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, } } -void vp9_short_walsh4x4_x8_c(short *input, short *output, int pitch) { +void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; short *ip = input; @@ -593,9 +593,9 @@ void vp9_short_walsh4x4_x8_c(short *input, short *output, int pitch) { } } -void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) { - vp9_short_walsh4x4_x8_c(input, output, pitch); - vp9_short_walsh4x4_x8_c(input + 4, output + 16, pitch); +void vp9_short_walsh8x4_c(short *input, short *output, int pitch) { + vp9_short_walsh4x4_c(input, output, pitch); + vp9_short_walsh4x4_c(input + 4, output + 16, pitch); } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b7b270031..428e585e1 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -594,9 +594,6 @@ static void update_state(VP9_COMP *cpi, [vp9_switchable_interp_map[mbmi->interp_filter]]; } - cpi->prediction_error += ctx->distortion; - cpi->intra_error += ctx->intra_error; - cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff; cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff; cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff; @@ -1217,10 +1214,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { if (lossless) { - cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8; - cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8; - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4; + cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_iwalsh4x4_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4; cpi->mb.optimize = 0; cpi->common.filter_level = 0; cpi->zbin_mode_boost_enabled = FALSE; @@ -1228,8 +1225,8 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { } else { cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4; } } @@ -1265,8 +1262,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { // Reset frame count of inter 0,0 motion vector usage. cpi->inter_zz_count = 0; - cpi->prediction_error = 0; - cpi->intra_error = 0; cpi->skip_true_count[0] = cpi->skip_true_count[1] = cpi->skip_true_count[2] = 0; cpi->skip_false_count[0] = cpi->skip_false_count[1] = cpi->skip_false_count[2] = 0; diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 3c98d4aa6..9e5bcea16 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -44,7 +44,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { TX_TYPE tx_type; #if CONFIG_NEWBINTRAMODES - b->bmi.as_mode.context = vp9_find_bpred_context(b); + b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); #endif vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index f30403cda..0b907b361 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -859,6 +859,8 @@ static double calc_correction_factor(double err_per_mb, power_term = (power_term > pt_high) ? pt_high : power_term; // Calculate correction factor + if (power_term < 1.0) + assert(error_term >= 0.0); correction_factor = pow(error_term, power_term); // Clip range @@ -920,15 +922,19 @@ static int estimate_max_q(VP9_COMP *cpi, // Look at the drop in prediction quality between the last frame // and the GF buffer (which contained an older frame). - sr_err_diff = - (fpstats->sr_coded_error - fpstats->coded_error) / - (fpstats->count * cpi->common.MBs); - sr_correction = (sr_err_diff / 32.0); - sr_correction = pow(sr_correction, 0.25); - if (sr_correction < 0.75) + if (fpstats->sr_coded_error > fpstats->coded_error) { + sr_err_diff = + (fpstats->sr_coded_error - fpstats->coded_error) / + (fpstats->count * cpi->common.MBs); + sr_correction = (sr_err_diff / 32.0); + sr_correction = pow(sr_correction, 0.25); + if (sr_correction < 0.75) + sr_correction = 0.75; + else if (sr_correction > 1.25) + sr_correction = 1.25; + } else { sr_correction = 0.75; - else if (sr_correction > 1.25) - sr_correction = 1.25; + } // Calculate a corrective factor based on a rolling ratio of bits spent // vs target bits @@ -1031,15 +1037,19 @@ static int estimate_cq(VP9_COMP *cpi, // Look at the drop in prediction quality between the last frame // and the GF buffer (which contained an older frame). - sr_err_diff = - (fpstats->sr_coded_error - fpstats->coded_error) / - (fpstats->count * cpi->common.MBs); - sr_correction = (sr_err_diff / 32.0); - sr_correction = pow(sr_correction, 0.25); - if (sr_correction < 0.75) + if (fpstats->sr_coded_error > fpstats->coded_error) { + sr_err_diff = + (fpstats->sr_coded_error - fpstats->coded_error) / + (fpstats->count * cpi->common.MBs); + sr_correction = (sr_err_diff / 32.0); + sr_correction = pow(sr_correction, 0.25); + if (sr_correction < 0.75) + sr_correction = 0.75; + else if (sr_correction > 1.25) + sr_correction = 1.25; + } else { sr_correction = 0.75; - else if (sr_correction > 1.25) - sr_correction = 1.25; + } // II ratio correction factor for clip as a whole clip_iiratio = cpi->twopass.total_stats->intra_error / @@ -1178,12 +1188,16 @@ static double get_prediction_decay_rate(VP9_COMP *cpi, mb_sr_err_diff = (next_frame->sr_coded_error - next_frame->coded_error) / (cpi->common.MBs); - second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0); - second_ref_decay = pow(second_ref_decay, 0.5); - if (second_ref_decay < 0.85) + if (mb_sr_err_diff <= 512.0) { + second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0); + second_ref_decay = pow(second_ref_decay, 0.5); + if (second_ref_decay < 0.85) + second_ref_decay = 0.85; + else if (second_ref_decay > 1.0) + second_ref_decay = 1.0; + } else { second_ref_decay = 0.85; - else if (second_ref_decay > 1.0) - second_ref_decay = 1.0; + } if (second_ref_decay < prediction_decay_rate) prediction_decay_rate = second_ref_decay; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index 2214ac99b..a89d2547e 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -62,10 +62,6 @@ struct lookahead_ctx * vp9_lookahead_init(unsigned int width, // Clamp the lookahead queue depth depth = clamp(depth, 1, MAX_LAG_BUFFERS); - // Align the buffer dimensions - width = (width + 15) &~15; - height = (height + 15) &~15; - // Allocate the lookahead structures ctx = calloc(1, sizeof(*ctx)); if (ctx) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 4901e6a90..cd8e74624 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -843,8 +843,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { - cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8; - cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8; + cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4; + cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; } cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; @@ -873,9 +873,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { - int width = (cpi->oxcf.Width + 15) & ~15; - int height = (cpi->oxcf.Height + 15) & ~15; - cpi->lookahead = vp9_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height, cpi->oxcf.lag_in_frames); if (!cpi->lookahead) @@ -885,7 +882,8 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) { #if VP9_TEMPORAL_ALT_REF if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer, - width, height, VP9BORDERINPIXELS)) + cpi->oxcf.Width, cpi->oxcf.Height, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); @@ -909,10 +907,7 @@ static int alloc_partition_data(VP9_COMP *cpi) { void vp9_alloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; - int width = cm->Width; - int height = cm->Height; - - if (vp9_alloc_frame_buffers(cm, width, height)) + if (vp9_alloc_frame_buffers(cm, cm->Width, cm->Height)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); @@ -920,21 +915,13 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate partition data"); - - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if ((height & 0xf) != 0) - height += 16 - (height & 0xf); - - if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf, - width, height, VP9BORDERINPIXELS)) + cm->Width, cm->Height, VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source, - width, height, VP9BORDERINPIXELS)) + cm->Width, cm->Height, VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); @@ -996,11 +983,11 @@ static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; /* our internal buffers are always multiples of 16 */ - int width = (cm->Width + 15) & ~15; - int height = (cm->Height + 15) & ~15; + int aligned_width = (cm->Width + 15) & ~15; + int aligned_height = (cm->Height + 15) & ~15; - cm->mb_rows = height >> 4; - cm->mb_cols = width >> 4; + cm->mb_rows = aligned_height >> 4; + cm->mb_cols = aligned_width >> 4; cm->MBs = cm->mb_rows * cm->mb_cols; cm->mode_info_stride = cm->mb_cols + 1; memset(cm->mip, 0, @@ -1013,12 +1000,12 @@ static void update_frame_size(VP9_COMP *cpi) { /* Update size of buffers local to this frame */ if (vp8_yv12_realloc_frame_buffer(&cpi->last_frame_uf, - width, height, VP9BORDERINPIXELS)) + cm->Width, cm->Height, VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to reallocate last frame buffer"); if (vp8_yv12_realloc_frame_buffer(&cpi->scaled_source, - width, height, VP9BORDERINPIXELS)) + cm->Width, cm->Height, VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); @@ -1217,11 +1204,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.lossless = oxcf->lossless; if (cpi->oxcf.lossless) { - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_iwalsh4x4_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4; } else { - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4; } cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; @@ -1315,9 +1302,6 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cm->Width = cpi->oxcf.Width; cm->Height = cpi->oxcf.Height; - cm->horiz_scale = cpi->horiz_scale; - cm->vert_scale = cpi->vert_scale; - // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) if (cpi->oxcf.Sharpness > 7) cpi->oxcf.Sharpness = 7; @@ -2103,8 +2087,8 @@ int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) { return 0; } -int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { +int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd) { VP9_COMP *cpi = (VP9_COMP *)(ptr); VP9_COMMON *cm = &cpi->common; int ref_fb_idx; @@ -2123,6 +2107,17 @@ int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, return 0; } +int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) { + VP9_COMP *cpi = (VP9_COMP *)(ptr); + VP9_COMMON *cm = &cpi->common; + + if (index < 0 || index >= NUM_REF_FRAMES) + return -1; + + *fb = &cm->yv12_fb[cm->ref_frame_map[index]]; + return 0; +} + int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMP *cpi = (VP9_COMP *)(ptr); @@ -2212,10 +2207,10 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, YV12_BUFFER_CONFIG *dst_fb) { - const int in_w = src_fb->y_width; - const int in_h = src_fb->y_height; - const int out_w = dst_fb->y_width; - const int out_h = dst_fb->y_height; + const int in_w = src_fb->y_crop_width; + const int in_h = src_fb->y_crop_height; + const int out_w = dst_fb->y_crop_width; + const int out_h = dst_fb->y_crop_height; int x, y; for (y = 0; y < out_h; y += 16) { @@ -2617,12 +2612,12 @@ static void scale_references(VP9_COMP *cpi) { for (i = 0; i < 3; i++) { YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]]; - if (ref->y_width != cm->mb_cols * 16 || ref->y_height != cm->mb_rows * 16) { + if (ref->y_crop_width != cm->Width || + ref->y_crop_height != cm->Height) { int new_fb = get_free_fb(cm); vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb], - cm->mb_cols * 16, - cm->mb_rows * 16, + cm->Width, cm->Height, VP9BORDERINPIXELS); scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); cpi->scaled_ref_idx[i] = new_fb; @@ -3897,7 +3892,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, /* Reset the frame pointers to the current frame size */ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], - cm->mb_cols * 16, cm->mb_rows * 16, + cm->Width, cm->Height, VP9BORDERINPIXELS); vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 5de6a7ad2..7a1a9b249 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -390,11 +390,6 @@ typedef struct VP9_COMP { CODING_CONTEXT coding_context; // Rate targetting variables - int64_t prediction_error; - int64_t last_prediction_error; - int64_t intra_error; - int64_t last_intra_error; - int this_frame_target; int projected_frame_size; int last_q[2]; // Separate values for Intra/Inter @@ -546,8 +541,6 @@ typedef struct VP9_COMP { int goldfreq; int auto_worst_q; int cpu_used; - int horiz_scale; - int vert_scale; int pass; vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS]; diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 6f9333521..d80ea02c1 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -247,7 +247,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { int Bias = 0; // Bias against raising loop filter and in favour of lowering it // Make a copy of the unfiltered / processed recon buffer - vp8_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf); + vp8_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); if (cm->frame_type == KEY_FRAME) cm->sharpness_level = 0; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2f03a264c..a8ea3956e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1165,7 +1165,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); #if CONFIG_NEWBINTRAMODES - b->bmi.as_mode.context = vp9_find_bpred_context(b); + b->bmi.as_mode.context = vp9_find_bpred_context(xd, b); #endif xd->mode_info_context->mbmi.txfm_size = TX_4X4; for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { @@ -1276,7 +1276,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, bmode_costs = mb->bmode_costs[A][L]; } #if CONFIG_NEWBINTRAMODES - mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd->block + i); + mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd, xd->block + i); #endif total_rd += rd_pick_intra4x4block( @@ -3441,9 +3441,11 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // Further refinement that is encode side only to test the top few candidates // in full and choose the best as the centre point for subsequent searches. - mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride, - frame_type, block_size); - + // The current implementation doesn't support scaling. + if (scale[frame_type].x_num == scale[frame_type].x_den && + scale[frame_type].y_num == scale[frame_type].y_den) + mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride, + frame_type, block_size); } static void model_rd_from_var_lapndz(int var, int n, int qstep, diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index fb61ece09..5df4d2562 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -457,8 +457,8 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { // Setup scaling factors. Scaling on each of the arnr frames is not supported vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0], &cpi->common.yv12_fb[cpi->common.new_fb_idx], - 16 * cpi->common.mb_cols, - 16 * cpi->common.mb_rows); + cpi->common.Width, + cpi->common.Height); cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0]; // Setup frame pointers, NULL indicates frame not included in filter diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index f330b464a..ea8631711 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -9,6 +9,7 @@ ## VP9_COMMON_SRCS-yes += vp9_common.mk +VP9_COMMON_SRCS-yes += vp9_iface_common.h VP9_COMMON_SRCS-yes += common/vp9_pragmas.h VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_onyx.h @@ -28,7 +29,7 @@ VP9_COMMON_SRCS-yes += common/vp9_filter.c VP9_COMMON_SRCS-yes += common/vp9_filter.h VP9_COMMON_SRCS-yes += common/vp9_findnearmv.c VP9_COMMON_SRCS-yes += common/generic/vp9_systemdependent.c -VP9_COMMON_SRCS-yes += common/vp9_idctllm.c +VP9_COMMON_SRCS-yes += common/vp9_idct.c VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h VP9_COMMON_SRCS-yes += common/vp9_blockd.h VP9_COMMON_SRCS-yes += common/vp9_common.h @@ -91,7 +92,7 @@ VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idctllm_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm @@ -110,13 +111,13 @@ VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm endif -VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idctllm_x86.c +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_x86.c ifeq ($(HAVE_SSE2),yes) -vp9/common/x86/vp9_idctllm_x86.c.o: CFLAGS += -msse2 +vp9/common/x86/vp9_idct_x86.c.o: CFLAGS += -msse2 vp9/common/x86/vp9_loopfilter_x86.c.o: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_x86.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_idctllm_x86.c.d: CFLAGS += -msse2 +vp9/common/x86/vp9_idct_x86.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_loopfilter_x86.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_x86.c.d: CFLAGS += -msse2 endif diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 708cec602..1eeec6b5a 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -16,6 +16,7 @@ #include "vpx/vp8cx.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/common/vp9_onyx.h" +#include "vp9/vp9_iface_common.h" #include <stdlib.h> #include <string.h> @@ -544,6 +545,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->u_buffer = img->planes[VPX_PLANE_U]; yv12->v_buffer = img->planes[VPX_PLANE_V]; + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; yv12->y_width = img->d_w; yv12->y_height = img->d_h; yv12->uv_width = (1 + yv12->y_width) / 2; @@ -867,9 +870,9 @@ static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t vp8e_copy_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -878,12 +881,28 @@ static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx, YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); - vp9_get_reference_enc(ctx->cpi, frame->frame_type, &sd); + vp9_copy_reference_enc(ctx->cpi, frame->frame_type, &sd); return VPX_CODEC_OK; } else return VPX_CODEC_INVALID_PARAM; } +static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { + vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); + + if (data) { + YV12_BUFFER_CONFIG* fb; + + vp9_get_reference_enc(ctx->cpi, data->idx, &fb); + yuvconfig2image(&data->img, fb, NULL); + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { @@ -1038,7 +1057,7 @@ static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx, static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { {VP8_SET_REFERENCE, vp8e_set_reference}, - {VP8_COPY_REFERENCE, vp8e_get_reference}, + {VP8_COPY_REFERENCE, vp8e_copy_reference}, {VP8_SET_POSTPROC, vp8e_set_previewpp}, {VP8E_UPD_ENTROPY, vp8e_update_entropy}, {VP8E_UPD_REFERENCE, vp8e_update_reference}, @@ -1062,6 +1081,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { {VP8E_SET_CQ_LEVEL, set_param}, {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, {VP9E_SET_LOSSLESS, set_param}, + {VP9_GET_REFERENCE, get_reference}, { -1, NULL}, }; diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index eabdb8556..66c89b5a9 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -17,6 +17,7 @@ #include "vpx_version.h" #include "decoder/vp9_onyxd.h" #include "decoder/vp9_onyxd_int.h" +#include "vp9/vp9_iface_common.h" #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) typedef vpx_codec_stream_info_t vp8_stream_info_t; @@ -273,36 +274,6 @@ update_error_state(vpx_codec_alg_priv_t *ctx, return res; } -static void yuvconfig2image(vpx_image_t *img, - const YV12_BUFFER_CONFIG *yv12, - void *user_priv) { - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - img->fmt = yv12->clrtype == REG_YUV ? - VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420; - img->w = yv12->y_stride; - img->h = (yv12->y_height + 2 * VP9BORDERINPIXELS + 15) & ~15; - img->d_w = yv12->y_width; - img->d_h = yv12->y_height; - img->x_chroma_shift = 1; - img->y_chroma_shift = 1; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; - img->bps = 12; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} - static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, @@ -613,6 +584,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->u_buffer = img->planes[VPX_PLANE_U]; yv12->v_buffer = img->planes[VPX_PLANE_V]; + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; yv12->y_width = img->d_w; yv12->y_height = img->d_h; yv12->uv_width = yv12->y_width / 2; @@ -648,9 +621,9 @@ static vpx_codec_err_t vp9_set_reference(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t vp9_get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t vp9_copy_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -660,13 +633,29 @@ static vpx_codec_err_t vp9_get_reference(vpx_codec_alg_priv_t *ctx, image2yuvconfig(&frame->img, &sd); - return vp9_get_reference_dec(ctx->pbi, - (VP9_REFFRAME)frame->frame_type, &sd); + return vp9_copy_reference_dec(ctx->pbi, + (VP9_REFFRAME)frame->frame_type, &sd); } else return VPX_CODEC_INVALID_PARAM; } +static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { + vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); + + if (data) { + YV12_BUFFER_CONFIG* fb; + + vp9_get_reference_dec(ctx->pbi, data->idx, &fb); + yuvconfig2image(&data->img, fb, NULL); + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { @@ -739,7 +728,7 @@ static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, static vpx_codec_ctrl_fn_map_t ctf_maps[] = { {VP8_SET_REFERENCE, vp9_set_reference}, - {VP8_COPY_REFERENCE, vp9_get_reference}, + {VP8_COPY_REFERENCE, vp9_copy_reference}, {VP8_SET_POSTPROC, vp8_set_postproc}, {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options}, {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options}, @@ -747,6 +736,7 @@ static vpx_codec_ctrl_fn_map_t ctf_maps[] = { {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options}, {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates}, {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted}, + {VP9_GET_REFERENCE, get_reference}, { -1, NULL}, }; diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h new file mode 100644 index 000000000..450be7dfd --- /dev/null +++ b/vp9/vp9_iface_common.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_VP9_IFACE_COMMON_H_ +#define VP9_VP9_IFACE_COMMON_H_ + +static void yuvconfig2image(vpx_image_t *img, + const YV12_BUFFER_CONFIG *yv12, + void *user_priv) { + /** vpx_img_wrap() doesn't allow specifying independent strides for + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ + img->fmt = yv12->clrtype == REG_YUV ? + VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420; + img->w = yv12->y_stride; + img->h = (yv12->y_height + 2 * VP9BORDERINPIXELS + 15) & ~15; + img->d_w = yv12->y_width; + img->d_h = yv12->y_height; + img->x_chroma_shift = 1; + img->y_chroma_shift = 1; + img->planes[VPX_PLANE_Y] = yv12->y_buffer; + img->planes[VPX_PLANE_U] = yv12->u_buffer; + img->planes[VPX_PLANE_V] = yv12->v_buffer; + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = yv12->y_stride; + img->stride[VPX_PLANE_U] = yv12->uv_stride; + img->stride[VPX_PLANE_V] = yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; + img->bps = 12; + img->user_priv = user_priv; + img->img_data = yv12->buffer_alloc; + img->img_data_owner = 0; + img->self_allocd = 0; +} + +#endif @@ -44,6 +44,12 @@ enum vp8_com_control_id { VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */ VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */ VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */ + + /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+) + * for its control ids. These should be migrated to something like the + * VP8_DECODER_CTRL_ID_START range next time we're ready to break the ABI. + */ + VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */ VP8_COMMON_CTRL_ID_MAX, VP8_DECODER_CTRL_ID_START = 256 }; @@ -97,6 +103,10 @@ typedef struct vpx_ref_frame { vpx_image_t img; /**< reference frame data in image format */ } vpx_ref_frame_t; +typedef struct vp9_ref_frame { + int idx; /**< frame index to get (input) */ + vpx_image_t img; /**< img structure to populate (output) */ +} vp9_ref_frame_t; /*!\brief vp8 decoder control function parameter type * @@ -110,6 +120,7 @@ VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) +VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) /*! @} - end defgroup vp8 */ diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c index 267d55f40..fc7f82881 100644 --- a/vpx_scale/generic/yv12config.c +++ b/vpx_scale/generic/yv12config.c @@ -38,10 +38,12 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { if (ybf) { - int y_stride = ((width + 2 * border) + 31) & ~31; - int yplane_size = (height + 2 * border) * y_stride; - int uv_width = width >> 1; - int uv_height = height >> 1; + int aligned_width = (width + 15) & ~15; + int aligned_height = (height + 15) & ~15; + int y_stride = ((aligned_width + 2 * border) + 31) & ~31; + int yplane_size = (aligned_height + 2 * border) * y_stride; + int uv_width = aligned_width >> 1; + int uv_height = aligned_height >> 1; /** There is currently a bunch of code which assumes * uv_stride == y_stride/2, so enforce this here. */ int uv_stride = y_stride >> 1; @@ -56,17 +58,18 @@ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) return -1; - /** Only support allocating buffers that have a height and width that - * are multiples of 16, and a border that's a multiple of 32. - * The border restriction is required to get 16-byte alignment of the - * start of the chroma rows without intoducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) + /* Only support allocating buffers that have a border that's a multiple + * of 32. The border restriction is required to get 16-byte alignment of + * the start of the chroma rows without intoducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if (border & 0x1f) return -3; - ybf->y_width = width; - ybf->y_height = height; + ybf->y_crop_width = width; + ybf->y_crop_height = height; + ybf->y_width = aligned_width; + ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_width = uv_width; diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c index d733bd49d..49d7e8e56 100644 --- a/vpx_scale/generic/yv12extend.c +++ b/vpx_scale/generic/yv12extend.c @@ -20,180 +20,81 @@ /**************************************************************************** * ****************************************************************************/ -void -vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { +static void extend_plane(uint8_t *s, /* source */ + int sp, /* source pitch */ + int w, /* width */ + int h, /* height */ + int et, /* extend top border */ + int el, /* extend left border */ + int eb, /* extend bottom border */ + int er) { /* extend right border */ int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; + uint8_t *src_ptr1, *src_ptr2; + uint8_t *dest_ptr1, *dest_ptr2; + int linesize; /* copy the left and right most columns out */ - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; + src_ptr1 = s; + src_ptr2 = s + w - 1; + dest_ptr1 = s - el; + dest_ptr2 = s + w; + + for (i = 0; i < h; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], el); + vpx_memset(dest_ptr2, src_ptr2[0], er); + src_ptr1 += sp; + src_ptr2 += sp; + dest_ptr1 += sp; + dest_ptr2 += sp; } - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; + /* Now copy the top and bottom lines into each line of the respective + * borders + */ + src_ptr1 = s - el; + src_ptr2 = s + sp * (h - 1) - el; + dest_ptr1 = s + sp * (-et) - el; + dest_ptr2 = s + sp * (h) - el; + linesize = el + er + w; + + for (i = 0; i < et; i++) { + vpx_memcpy(dest_ptr1, src_ptr1, linesize); + dest_ptr1 += sp; } - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = ybf->uv_height; - plane_width = ybf->uv_width; - Border /= 2; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->u_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - /* copy the left and right most columns out */ - src_ptr1 = ybf->v_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; + for (i = 0; i < eb; i++) { + vpx_memcpy(dest_ptr2, src_ptr2, linesize); + dest_ptr2 += sp; } } - -static void -extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) { - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; - +void +vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { + assert(ybf->y_height - ybf->y_crop_height < 16); + assert(ybf->y_width - ybf->y_crop_width < 16); + assert(ybf->y_height - ybf->y_crop_height >= 0); + assert(ybf->y_width - ybf->y_crop_width >= 0); + + extend_plane(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ybf->border, ybf->border, + ybf->border + ybf->y_height - ybf->y_crop_height, + ybf->border + ybf->y_width - ybf->y_crop_width); + + extend_plane(ybf->u_buffer, ybf->uv_stride, + (ybf->y_crop_width + 1) / 2, (ybf->y_crop_height + 1) / 2, + ybf->border / 2, ybf->border / 2, + (ybf->border + ybf->y_height - ybf->y_crop_height + 1) / 2, + (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2); + + extend_plane(ybf->v_buffer, ybf->uv_stride, + (ybf->y_crop_width + 1) / 2, (ybf->y_crop_height + 1) / 2, + ybf->border / 2, ybf->border / 2, + (ybf->border + ybf->y_height - ybf->y_crop_height + 1) / 2, + (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2); } - /**************************************************************************** * * ROUTINE : vp8_yv12_copy_frame diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 45e57f401..14b6e278b 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -42,6 +42,8 @@ extern "C" { typedef struct yv12_buffer_config { int y_width; int y_height; + int y_crop_width; + int y_crop_height; int y_stride; /* int yinternal_width; */ @@ -711,7 +711,7 @@ int main(int argc, const char **argv_) { struct input_ctx input = {0}; int frames_corrupted = 0; int dec_flags = 0; - int do_scale; + int do_scale = 0; int stream_w = 0, stream_h = 0; vpx_image_t *scaled_img = NULL; @@ -1645,8 +1645,6 @@ struct stream_state { stats_io_t stats; struct vpx_image *img; vpx_codec_ctx_t decoder; - vpx_ref_frame_t ref_enc; - vpx_ref_frame_t ref_dec; int mismatch_seen; }; @@ -2235,16 +2233,7 @@ static void initialize_encoder(struct stream_state *stream, #if CONFIG_DECODERS if (global->test_decode != TEST_DECODE_OFF) { - int width, height; - vpx_codec_dec_init(&stream->decoder, global->codec->dx_iface(), NULL, 0); - - width = (stream->config.cfg.g_w + 15) & ~15; - height = (stream->config.cfg.g_h + 15) & ~15; - vpx_img_alloc(&stream->ref_enc.img, VPX_IMG_FMT_I420, width, height, 1); - vpx_img_alloc(&stream->ref_dec.img, VPX_IMG_FMT_I420, width, height, 1); - stream->ref_enc.frame_type = VP8_LAST_FRAME; - stream->ref_dec.frame_type = VP8_LAST_FRAME; } #endif } @@ -2429,19 +2418,44 @@ static float usec_to_fps(uint64_t usec, unsigned int frames) { static void test_decode(struct stream_state *stream, - enum TestDecodeFatality fatal) { + enum TestDecodeFatality fatal, + const struct codec_item *codec) { + vpx_image_t enc_img, dec_img; + if (stream->mismatch_seen) return; - vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &stream->ref_enc); + /* Get the internal reference frame */ + if (codec->fourcc == VP8_FOURCC) { + struct vpx_ref_frame ref_enc, ref_dec; + int width, height; + + width = (stream->config.cfg.g_w + 15) & ~15; + height = (stream->config.cfg.g_h + 15) & ~15; + vpx_img_alloc(&ref_enc.img, VPX_IMG_FMT_I420, width, height, 1); + enc_img = ref_enc.img; + vpx_img_alloc(&ref_dec.img, VPX_IMG_FMT_I420, width, height, 1); + dec_img = ref_dec.img; + + ref_enc.frame_type = VP8_LAST_FRAME; + ref_dec.frame_type = VP8_LAST_FRAME; + vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &ref_enc); + vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &ref_dec); + } else { + struct vp9_ref_frame ref; + + ref.idx = 0; + vpx_codec_control(&stream->encoder, VP9_GET_REFERENCE, &ref); + enc_img = ref.img; + vpx_codec_control(&stream->decoder, VP9_GET_REFERENCE, &ref); + dec_img = ref.img; + } ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame"); - vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &stream->ref_dec); ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame"); - if (!compare_img(&stream->ref_enc.img, &stream->ref_dec.img)) { + if (!compare_img(&enc_img, &dec_img)) { int y[2], u[2], v[2]; - find_mismatch(&stream->ref_enc.img, &stream->ref_dec.img, - y, u, v); + find_mismatch(&enc_img, &dec_img, y, u, v); stream->decoder.err = 1; warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL, "Stream %d: Encode/decode mismatch on frame %d" @@ -2450,6 +2464,9 @@ static void test_decode(struct stream_state *stream, y[0], y[1], u[0], u[1], v[0], v[1]); stream->mismatch_seen = stream->frames_out; } + + vpx_img_free(&enc_img); + vpx_img_free(&dec_img); } @@ -2671,7 +2688,7 @@ int main(int argc, const char **argv_) { } if (got_data && global.test_decode != TEST_DECODE_OFF) - FOREACH_STREAM(test_decode(stream, global.test_decode)); + FOREACH_STREAM(test_decode(stream, global.test_decode, global.codec)); } fflush(stdout); @@ -2703,8 +2720,6 @@ int main(int argc, const char **argv_) { if (global.test_decode != TEST_DECODE_OFF) { FOREACH_STREAM(vpx_codec_destroy(&stream->decoder)); - FOREACH_STREAM(vpx_img_free(&stream->ref_enc.img)); - FOREACH_STREAM(vpx_img_free(&stream->ref_dec.img)); } close_input_file(&input); |