diff options
-rw-r--r-- | examples.mk | 16 | ||||
-rw-r--r-- | vp9/common/vp9_idct.h | 7 | ||||
-rw-r--r-- | vp9/common/vp9_idctllm.c | 7 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 2 | ||||
-rw-r--r-- | vp9/common/x86/vp9_idctllm_x86.c | 76 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 38 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 43 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 6 | ||||
-rw-r--r-- | vp9/vp9_common.mk | 3 | ||||
-rw-r--r-- | vpxdec.c | 34 | ||||
-rw-r--r-- | vpxenc.c | 41 |
15 files changed, 218 insertions, 69 deletions
diff --git a/examples.mk b/examples.mk index f1cc42bf7..8426ee769 100644 --- a/examples.mk +++ b/examples.mk @@ -8,6 +8,12 @@ ## be found in the AUTHORS file in the root of the source tree. ## +LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \ + third_party/libyuv/include/libyuv/cpu_id.h \ + third_party/libyuv/include/libyuv/scale.h \ + third_party/libyuv/source/row.h \ + third_party/libyuv/source/scale.c \ + third_party/libyuv/source/cpu_id.c # List of examples to build. UTILS are files that are taken from the source # tree directly, and GEN_EXAMPLES are files that are created from the @@ -25,6 +31,7 @@ vpxdec.SRCS += nestegg/halloc/src/hlist.h vpxdec.SRCS += nestegg/halloc/src/macros.h vpxdec.SRCS += nestegg/include/nestegg/nestegg.h vpxdec.SRCS += nestegg/src/nestegg.c +vpxdec.SRCS += $(LIBYUV_SRCS) vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 vpxdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += vpxenc.c @@ -36,6 +43,7 @@ vpxenc.SRCS += vpx_ports/vpx_timer.h vpxenc.SRCS += libmkv/EbmlIDs.h vpxenc.SRCS += libmkv/EbmlWriter.c vpxenc.SRCS += libmkv/EbmlWriter.h +vpxenc.SRCS += $(LIBYUV_SRCS) vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c @@ -99,13 +107,7 @@ vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame # C file is provided, not generated automatically. UTILS-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c -vp8_multi_resolution_encoder.SRCS \ - += third_party/libyuv/include/libyuv/basic_types.h \ - third_party/libyuv/include/libyuv/cpu_id.h \ - third_party/libyuv/include/libyuv/scale.h \ - third_party/libyuv/source/row.h \ - third_party/libyuv/source/scale.c \ - third_party/libyuv/source/cpu_id.c +vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS) vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 2d128e155..3e0ee4b63 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -13,6 +13,13 @@ #include "./vpx_config.h" +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) + +/* If we don't want to use ROUND_POWER_OF_TWO macro +static INLINE int16_t round_power_of_two(int16_t value, int n) { + return (value + (1 << (n - 1))) >> n; +}*/ + // Constants and Macros used by all idct/dct functions #define DCT_CONST_BITS 14 #define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 632dae8fd..f34823b36 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -31,13 +31,6 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) - -/* If we don't want to use ROUND_POWER_OF_TWO macro -static INLINE int16_t round_power_of_two(int16_t value, int n) { - return (value + (1 << (n - 1))) >> n; -}*/ - typedef void (*transform_1d)(int16_t*, int16_t*); typedef struct { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 700af7fa7..02a6711e5 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -296,7 +296,7 @@ specialize vp9_short_iht16x16 # dct and add prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" -specialize vp9_dc_only_idct_add +specialize vp9_dc_only_idct_add sse2 prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_inv_walsh4x4_1_x8 diff --git a/vp9/common/x86/vp9_idctllm_x86.c b/vp9/common/x86/vp9_idctllm_x86.c new file mode 100644 index 000000000..667f5c1d3 --- /dev/null +++ b/vp9/common/x86/vp9_idctllm_x86.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <emmintrin.h> // SSE2 +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_idct.h" + +#if HAVE_SSE2 +// In order to improve performance, clip absolute diff values to [0, 255], +// which allows to keep the additions/subtractions in 8 bits. +void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride) { + int a1; + int16_t out; + uint8_t abs_diff; + __m128i p0, p1, p2, p3; + unsigned int extended_diff; + __m128i diff; + + out = dct_const_round_shift(input_dc * cospi_16_64); + out = dct_const_round_shift(out * cospi_16_64); + a1 = ROUND_POWER_OF_TWO(out, 4); + + // Read prediction data. + p0 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 0 * pitch)); + p1 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 1 * pitch)); + p2 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 2 * pitch)); + p3 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 3 * pitch)); + + // Unpack prediction data, and store 4x4 array in 1 XMM register. + p0 = _mm_unpacklo_epi32(p0, p1); + p2 = _mm_unpacklo_epi32(p2, p3); + p0 = _mm_unpacklo_epi64(p0, p2); + + // Clip dc value to [0, 255] range. Then, do addition or subtraction + // according to its sign. + if (a1 >= 0) { + abs_diff = (a1 > 255) ? 255 : a1; + extended_diff = abs_diff * 0x01010101u; + diff = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_diff), 0); + + p1 = _mm_adds_epu8(p0, diff); + } else { + abs_diff = (a1 < -255) ? 255 : -a1; + extended_diff = abs_diff * 0x01010101u; + diff = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_diff), 0); + + p1 = _mm_subs_epu8(p0, diff); + } + + // Store results to dst. + *(int *)dst_ptr = _mm_cvtsi128_si32(p1); + dst_ptr += stride; + + p1 = _mm_srli_si128(p1, 4); + *(int *)dst_ptr = _mm_cvtsi128_si32(p1); + dst_ptr += stride; + + p1 = _mm_srli_si128(p1, 4); + *(int *)dst_ptr = _mm_cvtsi128_si32(p1); + dst_ptr += stride; + + p1 = _mm_srli_si128(p1, 4); + *(int *)dst_ptr = _mm_cvtsi128_si32(p1); +} +#endif diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 170202ba4..8e9e5ad7d 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -126,7 +126,7 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1; xd->inv_txm4x4 = vp9_short_idct4x4llm; xd->itxm_add = vp9_dequant_idct_add; - xd->dc_only_itxm_add = vp9_dc_only_idct_add_c; + xd->dc_only_itxm_add = vp9_dc_only_idct_add; xd->itxm_add_y_block = vp9_dequant_idct_add_y_block; xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; if (xd->lossless) { diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index c330bf97d..0ec5036e4 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -47,7 +47,7 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, if (xd->eobs[i * 4 + j] > 1) vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride); else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride); + vp9_dc_only_idct_add(q[0]*dq[0], pre, dst, 16, stride); ((int *)q)[0] = 0; } @@ -72,7 +72,7 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, if (xd->eobs[16 + i * 2 + j] > 1) vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride); else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride); + vp9_dc_only_idct_add(q[0]*dq[0], pre, dstu, 8, stride); ((int *)q)[0] = 0; } @@ -90,7 +90,7 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, if (xd->eobs[20 + i * 2 + j] > 1) vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride); else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride); + vp9_dc_only_idct_add(q[0]*dq[0], pre, dstv, 8, stride); ((int *)q)[0] = 0; } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index eaed1a964..c0fe5ac76 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1187,7 +1187,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { // Copy data over into macro block data structures. x->src = *cpi->Source; - xd->pre = cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]]; + xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; // set up frame for intra coded blocks @@ -2089,11 +2089,11 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, assert(cm->frame_type != KEY_FRAME); if (mbmi->ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (mbmi->ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], @@ -2104,11 +2104,11 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int second_ref_fb_idx; if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (mbmi->second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], @@ -2319,11 +2319,11 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, assert(cm->frame_type != KEY_FRAME); if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], @@ -2334,11 +2334,11 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, int second_ref_fb_idx; if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], @@ -2548,11 +2548,11 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, assert(cm->frame_type != KEY_FRAME); if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], @@ -2563,11 +2563,11 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, int second_ref_fb_idx; if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; else - second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx]; + second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 05a0f6f04..4d0a299e8 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -436,10 +436,10 @@ void vp9_first_pass(VP9_COMP *cpi) { int recon_yoffset, recon_uvoffset; YV12_BUFFER_CONFIG *lst_yv12 = - &cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]]; + &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]]; YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; YV12_BUFFER_CONFIG *gld_yv12 = - &cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]]; + &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]]; int recon_y_stride = lst_yv12->y_stride; int recon_uv_stride = lst_yv12->uv_stride; int64_t intra_error = 0; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index bc06c9458..d6644c2aa 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -445,7 +445,7 @@ void vp9_update_mbgraph_stats VP9_COMMON *const cm = &cpi->common; int i, n_frames = vp9_lookahead_depth(cpi->lookahead); YV12_BUFFER_CONFIG *golden_ref = - &cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]]; + &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]]; // we need to look ahead beyond where the ARF transitions into // being a GF - so exit if we don't look ahead beyond that diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 45ab6cd8c..ced6eddca 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -833,7 +833,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } { - int y_stride = cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_stride; + int y_stride = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].y_stride; if (cpi->sf.search_method == NSTEP) { vp9_init3smotion_compensation(&cpi->mb, y_stride); @@ -1754,7 +1754,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) { #endif if (cpi->b_calculate_psnr) { YV12_BUFFER_CONFIG *lst_yv12 = - &cpi->common.yv12_fb[cpi->common.active_ref_idx[cpi->lst_fb_idx]]; + &cpi->common.yv12_fb[cpi->common.ref_frame_map[cpi->lst_fb_idx]]; double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height; double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error); double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2); @@ -2099,11 +2099,11 @@ int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, int ref_fb_idx; if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx]; else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx]; else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx]; else return -1; @@ -2120,11 +2120,11 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, int ref_fb_idx; if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->lst_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx]; else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->gld_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx]; else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->active_ref_idx[cpi->alt_fb_idx]; + ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx]; else return -1; @@ -2480,9 +2480,9 @@ static void update_reference_frames(VP9_COMP * const cpi) { // If any buffer copy / swapping is signaled it should be done here. if (cm->frame_type == KEY_FRAME) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->gld_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { /* Preserve the previously existing golden frame and update the frame in * the alt ref slot instead. This is highly specific to the current use of @@ -2496,7 +2496,7 @@ static void update_reference_frames(VP9_COMP * const cpi) { int tmp; ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; @@ -2504,18 +2504,18 @@ static void update_reference_frames(VP9_COMP * const cpi) { } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } if (cpi->refresh_golden_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->gld_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); } } if (cpi->refresh_last_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->active_ref_idx[cpi->lst_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); } } @@ -2604,7 +2604,7 @@ static void scale_references(VP9_COMP *cpi) { int i; for (i = 0; i < 3; i++) { - YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->active_ref_idx[i]]; + YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]]; if (ref->y_width != cm->Width || ref->y_height != cm->Height) { int new_fb = get_free_fb(cm); @@ -2616,8 +2616,8 @@ static void scale_references(VP9_COMP *cpi) { scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); cpi->scaled_ref_idx[i] = new_fb; } else { - cpi->scaled_ref_idx[i] = cm->active_ref_idx[i]; - cm->fb_idx_ref_cnt[cm->active_ref_idx[i]]++; + cpi->scaled_ref_idx[i] = cm->ref_frame_map[i]; + cm->fb_idx_ref_cnt[cm->ref_frame_map[i]]++; } } } @@ -3644,8 +3644,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, FILE *recon_file; sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); recon_file = fopen(filename, "wb"); - fwrite(cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].buffer_alloc, - cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].frame_size, + fwrite(cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].buffer_alloc, + cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].frame_size, 1, recon_file); fclose(recon_file); } @@ -3867,6 +3867,11 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cm->fb_idx_ref_cnt[cm->new_fb_idx]--; cm->new_fb_idx = get_free_fb(cm); + /* Get the mapping of L/G/A to the reference buffer pool */ + cm->active_ref_idx[0] = cm->ref_frame_map[cpi->lst_fb_idx]; + cm->active_ref_idx[1] = cm->ref_frame_map[cpi->gld_fb_idx]; + cm->active_ref_idx[2] = cm->ref_frame_map[cpi->alt_fb_idx]; + /* Reset the frame pointers to the current frame size */ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], cm->mb_cols * 16, cm->mb_rows * 16, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a2e6c34b5..496be950c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3111,7 +3111,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG yv12_mb[4], struct scale_factors scale[MAX_REF_FRAMES]) { VP9_COMMON *cm = &cpi->common; - YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.active_ref_idx[idx]]; + YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int use_prev_in_find_mv_refs, use_prev_in_find_best_ref; @@ -4083,7 +4083,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, fb = cpi->alt_fb_idx; } - if (cpi->scaled_ref_idx[fb] != cm->active_ref_idx[fb]) + if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; } @@ -5176,7 +5176,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, fb = cpi->alt_fb_idx; } - if (cpi->scaled_ref_idx[fb] != cm->active_ref_idx[fb]) + if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; #if CONFIG_COMP_INTERINTRA_PRED diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index eb152f521..f330b464a 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -110,10 +110,13 @@ VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm endif +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idctllm_x86.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_x86.c ifeq ($(HAVE_SSE2),yes) +vp9/common/x86/vp9_idctllm_x86.c.o: CFLAGS += -msse2 vp9/common/x86/vp9_loopfilter_x86.c.o: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_x86.c.o: CFLAGS += -msse2 +vp9/common/x86/vp9_idctllm_x86.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_loopfilter_x86.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_x86.c.d: CFLAGS += -msse2 endif @@ -30,6 +30,7 @@ #endif #include "tools_common.h" #include "nestegg/include/nestegg/nestegg.h" +#include "third_party/libyuv/include/libyuv/scale.h" #if CONFIG_OS_SUPPORT #if defined(_MSC_VER) @@ -93,6 +94,8 @@ static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, "Show version string"); static const arg_def_t error_concealment = ARG_DEF(NULL, "error-concealment", 0, "Enable decoder error-concealment"); +static const arg_def_t scalearg = ARG_DEF("S", "scale", 0, + "Scale output frames uniformly"); #if CONFIG_MD5 @@ -102,7 +105,7 @@ static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0, static const arg_def_t *all_args[] = { &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg, &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile, - &threadsarg, &verbosearg, + &threadsarg, &verbosearg, &scalearg, #if CONFIG_MD5 &md5arg, #endif @@ -708,6 +711,9 @@ int main(int argc, const char **argv_) { struct input_ctx input = {0}; int frames_corrupted = 0; int dec_flags = 0; + int do_scale; + int stream_w = 0, stream_h = 0; + vpx_image_t *scaled_img = NULL; /* Parse command line */ exec_name = argv_[0]; @@ -757,6 +763,8 @@ int main(int argc, const char **argv_) { cfg.threads = arg_parse_uint(&arg); else if (arg_match(&arg, &verbosearg, argi)) quiet = 0; + else if (arg_match(&arg, &scalearg, argi)) + do_scale = 1; #if CONFIG_VP8_DECODER else if (arg_match(&arg, &addnoise_level, argi)) { @@ -1015,6 +1023,30 @@ int main(int argc, const char **argv_) { show_progress(frame_in, frame_out, dx_time); if (!noblit) { + if (do_scale) { + if (frame_out == 1) { + stream_w = img->d_w; + stream_h = img->d_h; + scaled_img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, + stream_w, stream_h, 16); + } + if (img && (img->d_w != stream_w || img->d_h != stream_h)) { + I420Scale(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + img->d_w, img->d_h, + scaled_img->planes[VPX_PLANE_Y], + scaled_img->stride[VPX_PLANE_Y], + scaled_img->planes[VPX_PLANE_U], + scaled_img->stride[VPX_PLANE_U], + scaled_img->planes[VPX_PLANE_V], + scaled_img->stride[VPX_PLANE_V], + stream_w, stream_h, + kFilterBox); + img = scaled_img; + } + } + if (img) { unsigned int y; char out_fn[PATH_MAX]; @@ -47,6 +47,7 @@ #include "y4minput.h" #include "libmkv/EbmlWriter.h" #include "libmkv/EbmlIDs.h" +#include "third_party/libyuv/include/libyuv/scale.h" /* Need special handling of these functions on Windows */ #if defined(_MSC_VER) @@ -1642,6 +1643,7 @@ struct stream_state { uint64_t cx_time; size_t nbytes; stats_io_t stats; + struct vpx_image *img; vpx_codec_ctx_t decoder; vpx_ref_frame_t ref_enc; vpx_ref_frame_t ref_dec; @@ -2061,11 +2063,15 @@ static void validate_stream_config(struct stream_state *stream) { static void set_stream_dimensions(struct stream_state *stream, unsigned int w, unsigned int h) { - if ((stream->config.cfg.g_w && stream->config.cfg.g_w != w) - || (stream->config.cfg.g_h && stream->config.cfg.g_h != h)) - fatal("Stream %d: Resizing not yet supported", stream->index); - stream->config.cfg.g_w = w; - stream->config.cfg.g_h = h; + if (!stream->config.cfg.g_w) { + if (!stream->config.cfg.g_h) + stream->config.cfg.g_w = w; + else + stream->config.cfg.g_w = w * stream->config.cfg.g_h / h; + } + if (!stream->config.cfg.g_h) { + stream->config.cfg.g_h = h * stream->config.cfg.g_w / w; + } } @@ -2258,6 +2264,28 @@ static void encode_frame(struct stream_state *stream, next_frame_start = (cfg->g_timebase.den * (int64_t)(frames_in) * global->framerate.den) / cfg->g_timebase.num / global->framerate.num; + + /* Scale if necessary */ + if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) { + if (!stream->img) + stream->img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, + cfg->g_w, cfg->g_h, 16); + I420Scale(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + img->d_w, img->d_h, + stream->img->planes[VPX_PLANE_Y], + stream->img->stride[VPX_PLANE_Y], + stream->img->planes[VPX_PLANE_U], + stream->img->stride[VPX_PLANE_U], + stream->img->planes[VPX_PLANE_V], + stream->img->stride[VPX_PLANE_V], + stream->img->d_w, stream->img->d_h, + kFilterBox); + + img = stream->img; + } + vpx_usec_timer_start(&timer); vpx_codec_encode(&stream->encoder, img, frame_start, (unsigned long)(next_frame_start - frame_start), @@ -2518,6 +2546,9 @@ int main(int argc, const char **argv_) { }); /* Update stream configurations from the input file's parameters */ + if (!input.w || !input.h) + fatal("Specify stream dimensions with --width (-w) " + " and --height (-h)"); FOREACH_STREAM(set_stream_dimensions(stream, input.w, input.h)); FOREACH_STREAM(validate_stream_config(stream)); |