diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_idct.c | 90 | ||||
-rw-r--r-- | vp9/common/vp9_idct.h | 19 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 16 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 10 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.c | 101 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.h | 29 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 61 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemv.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 15 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 90 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 69 | ||||
-rw-r--r-- | vp9/vp9dx.mk | 2 |
14 files changed, 205 insertions, 305 deletions
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 10b83f58b..99d84c9ca 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -1284,3 +1284,93 @@ void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest, dest += dest_stride; } } + +// idct +void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob) { + if (eob > 1) + vp9_short_idct4x4_add(input, dest, stride); + else + vp9_short_idct4x4_1_add(input, dest, stride); +} + + +void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, int stride, + int eob) { + if (eob > 1) + vp9_short_iwalsh4x4_add(input, dest, stride); + else + vp9_short_iwalsh4x4_1_add_c(input, dest, stride); +} + +void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. + if (eob) { + if (eob == 1) + // DC only DCT coefficient + vp9_short_idct8x8_1_add(input, dest, stride); + else if (eob <= 10) + vp9_short_idct8x8_10_add(input, dest, stride); + else + vp9_short_idct8x8_add(input, dest, stride); + } +} + +void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob) { + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to separate different cases. */ + if (eob) { + if (eob == 1) + /* DC only DCT coefficient. */ + vp9_short_idct16x16_1_add(input, dest, stride); + else if (eob <= 10) + vp9_short_idct16x16_10_add(input, dest, stride); + else + vp9_short_idct16x16_add(input, dest, stride); + } +} + +void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob) { + if (eob) { + if (eob == 1) + vp9_short_idct32x32_1_add(input, dest, stride); + else + vp9_short_idct32x32_add(input, dest, stride); + } +} + +// iht +void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, + int eob) { + if (tx_type == DCT_DCT) + vp9_idct_add(input, dest, stride, eob); + else + vp9_short_iht4x4_add(input, dest, stride, tx_type); +} + +void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct_add_8x8(input, dest, stride, eob); + } else { + if (eob > 0) { + vp9_short_iht8x8_add(input, dest, stride, tx_type); + } + } +} + +void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct_add_16x16(input, dest, stride, eob); + } else { + if (eob > 0) { + vp9_short_iht16x16_add(input, dest, stride, tx_type); + } + } +} diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 59892cd03..0ef905cc8 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -16,6 +16,7 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" // Constants and Macros used by all idct/dct functions @@ -86,4 +87,22 @@ typedef struct { transform_1d cols, rows; // vertical and horizontal } transform_2d; + +void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob); +void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, + int stride, int eob); +void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob); +void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob); +void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob); + +void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob); + +void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob); + +void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob); + + #endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index e5439ec8c..61be7c6b7 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -28,22 +28,6 @@ forward_decls vp9_common_forward_decls [ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3 # -# Dequant -# - -prototype void vp9_idct_add_16x16 "int16_t *input, uint8_t *dest, int stride, int eob" -specialize vp9_idct_add_16x16 - -prototype void vp9_idct_add_8x8 "int16_t *input, uint8_t *dest, int stride, int eob" -specialize vp9_idct_add_8x8 - -prototype void vp9_idct_add "int16_t *input, uint8_t *dest, int stride, int eob" -specialize vp9_idct_add - -prototype void vp9_idct_add_32x32 "int16_t *q, uint8_t *dst, int stride, int eob" -specialize vp9_idct_add_32x32 - -# # RECON # prototype void vp9_d207_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 5b422d1c8..8b23c731e 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -19,6 +19,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_extend.h" +#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconintra.h" @@ -31,7 +32,6 @@ #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_dsubexp.h" -#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" #include "vp9/decoder/vp9_thread.h" @@ -101,15 +101,15 @@ static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, if (tx_type == DCT_DCT) xd->itxm_add(qcoeff, dst, stride, eob); else - vp9_iht_add_c(tx_type, qcoeff, dst, stride, eob); + vp9_iht_add(tx_type, qcoeff, dst, stride, eob); break; case TX_8X8: tx_type = get_tx_type_8x8(pd->plane_type, xd); - vp9_iht_add_8x8_c(tx_type, qcoeff, dst, stride, eob); + vp9_iht_add_8x8(tx_type, qcoeff, dst, stride, eob); break; case TX_16X16: tx_type = get_tx_type_16x16(pd->plane_type, xd); - vp9_iht_add_16x16_c(tx_type, qcoeff, dst, stride, eob); + vp9_iht_add_16x16(tx_type, qcoeff, dst, stride, eob); break; case TX_32X32: tx_type = DCT_DCT; @@ -490,7 +490,7 @@ static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - xd->itxm_add = xd->lossless ? vp9_idct_add_lossless_c + xd->itxm_add = xd->lossless ? vp9_idct_add_lossless : vp9_idct_add; } diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c deleted file mode 100644 index a0af000b0..000000000 --- a/vp9/decoder/vp9_idct_blk.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_idct_blk.h" - -void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, - int eob) { - if (tx_type == DCT_DCT) - vp9_idct_add(input, dest, stride, eob); - else - vp9_short_iht4x4_add(input, dest, stride, tx_type); -} - -void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add_8x8(input, dest, stride, eob); - } else { - if (eob > 0) { - vp9_short_iht8x8_add(input, dest, stride, tx_type); - } - } -} - -void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) { - if (eob > 1) - vp9_short_idct4x4_add(input, dest, stride); - else - vp9_short_idct4x4_1_add(input, dest, stride); -} - -void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) - vp9_short_iwalsh4x4_add(input, dest, stride); - else - vp9_short_iwalsh4x4_1_add_c(input, dest, stride); -} - -void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - if (eob) { - if (eob == 1) - // DC only DCT coefficient - vp9_short_idct8x8_1_add(input, dest, stride); - else if (eob <= 10) - vp9_short_idct8x8_10_add(input, dest, stride); - else - vp9_short_idct8x8_add(input, dest, stride); - } -} - -void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add_16x16(input, dest, stride, eob); - } else { - if (eob > 0) { - vp9_short_iht16x16_add(input, dest, stride, tx_type); - } - } -} - -void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to separate different cases. */ - if (eob) { - if (eob == 1) - /* DC only DCT coefficient. */ - vp9_short_idct16x16_1_add(input, dest, stride); - else if (eob <= 10) - vp9_short_idct16x16_10_add(input, dest, stride); - else - vp9_short_idct16x16_add(input, dest, stride); - } -} - -void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) { - if (eob) { - if (eob == 1) - vp9_short_idct32x32_1_add(input, dest, stride); - else - vp9_short_idct32x32_add(input, dest, stride); - } -} - diff --git a/vp9/decoder/vp9_idct_blk.h b/vp9/decoder/vp9_idct_blk.h deleted file mode 100644 index 00f1bc6a6..000000000 --- a/vp9/decoder/vp9_idct_blk.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_IDCT_BLK_H_ -#define VP9_DECODER_VP9_IDCT_BLK_H_ - -#include "vp9/common/vp9_blockd.h" - -void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, - int stride, int eob); - -void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob); - -void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob); - -void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob); - -#endif // VP9_DECODER_VP9_IDCT_BLK_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 76a5d33e7..a610d6340 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -8,16 +8,21 @@ * be found in the AUTHORS file in the root of the source tree. */ + +#include "./vp9_rtcd.h" #include "./vpx_config.h" -#include "vp9/encoder/vp9_encodemb.h" + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_reconinter.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/encoder/vp9_tokenize.h" #include "vp9/common/vp9_reconintra.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/encoder/vp9_rdopt.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9_rtcd.h" + +#include "vp9/encoder/vp9_encodemb.h" +#include "vp9/encoder/vp9_quantize.h" +#include "vp9/encoder/vp9_rdopt.h" +#include "vp9/encoder/vp9_tokenize.h" DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); @@ -47,28 +52,6 @@ static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, xd->inv_txm4x4_add(dqcoeff, dest, stride); } -static void inverse_transform_b_8x8_add(int eob, - int16_t *dqcoeff, uint8_t *dest, - int stride) { - if (eob <= 1) - vp9_short_idct8x8_1_add(dqcoeff, dest, stride); - else if (eob <= 10) - vp9_short_idct8x8_10_add(dqcoeff, dest, stride); - else - vp9_short_idct8x8_add(dqcoeff, dest, stride); -} - -static void inverse_transform_b_16x16_add(int eob, - int16_t *dqcoeff, uint8_t *dest, - int stride) { - if (eob <= 1) - vp9_short_idct16x16_1_add(dqcoeff, dest, stride); - else if (eob <= 10) - vp9_short_idct16x16_10_add(dqcoeff, dest, stride); - else - vp9_short_idct16x16_add(dqcoeff, dest, stride); -} - static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { struct macroblock_plane *const p = &x->plane[plane]; const MACROBLOCKD *const xd = &x->e_mbd; @@ -476,12 +459,10 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); break; case TX_16X16: - inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst, - pd->dst.stride); + vp9_idct_add_16x16(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); break; case TX_8X8: - inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst, - pd->dst.stride); + vp9_idct_add_8x8(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); break; case TX_4X4: // this is like vp9_short_idct4x4 but has a special case around eob<=1 @@ -597,12 +578,8 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan, iscan); - if (!x->skip_encode && *eob) { - if (tx_type == DCT_DCT) - inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride); - else - vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); - } + if (!x->skip_encode && *eob) + vp9_iht_add_16x16(tx_type, dqcoeff, dst, pd->dst.stride, *eob); break; case TX_8X8: tx_type = get_tx_type_8x8(pd->plane_type, xd); @@ -626,12 +603,8 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan, iscan); - if (!x->skip_encode && *eob) { - if (tx_type == DCT_DCT) - inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride); - else - vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); - } + if (!x->skip_encode && *eob) + vp9_iht_add_8x8(tx_type, dqcoeff, dst, pd->dst.stride, *eob); break; case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index db08ee856..04a4172a5 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -8,13 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <math.h> #include "vp9/common/vp9_common.h" -#include "vp9/encoder/vp9_encodemv.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_encodemv.h" -#include <math.h> #ifdef ENTROPY_STATS extern unsigned int active_section; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index eaa3bd183..471931349 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -569,7 +569,7 @@ void vp9_first_pass(VP9_COMP *cpi) { mb_row << 1, 1 << mi_height_log2(xd->this_mi->mbmi.sb_type), mb_col << 1, - 1 << mi_height_log2(xd->this_mi->mbmi.sb_type)); + 1 << mi_width_log2(xd->this_mi->mbmi.sb_type)); // do intra 16x16 prediction this_error = vp9_encode_intra(x, use_dc_pred); diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 0a6576eb5..f83fcc531 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -103,7 +103,8 @@ static int do_16x16_motion_search(VP9_COMP *cpi, int_mv *ref_mv, int_mv *dst_mv, dst_mv->as_int = tmp_mv.as_int; } - // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well + // If the current best reference mv is not centered on 0,0 then do a 0,0 + // based search as well. if (ref_mv->as_int) { unsigned int tmp_err; int_mv zero_ref_mv, tmp_mv; @@ -217,7 +218,8 @@ static void update_mbgraph_mb_stats stats->ref[GOLDEN_FRAME].m.mv.as_int = 0; } - // Alt-ref frame MV search, if it exists and is different than last/golden frame + // Do an Alt-ref frame MV search, if it exists and is different than + // last/golden frame. if (alt_ref) { int a_motion_error; xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; @@ -246,7 +248,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, int_mv arf_top_mv, gld_top_mv; MODE_INFO mi_local = { { 0 } }; - // Set up limit values for motion vectors to prevent them extending outside the UMV borders + // Set up limit values for motion vectors to prevent them extending outside + // the UMV borders. arf_top_mv.as_int = 0; gld_top_mv.as_int = 0; x->mv_row_min = -BORDER_MV_PIXELS_B16; @@ -266,7 +269,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, int arf_y_in_offset = arf_y_offset; int gld_y_in_offset = gld_y_offset; - // Set up limit values for motion vectors to prevent them extending outside the UMV borders + // Set up limit values for motion vectors to prevent them extending outside + // the UMV borders. arf_left_mv.as_int = arf_top_mv.as_int; gld_left_mv.as_int = gld_top_mv.as_int; x->mv_col_min = -BORDER_MV_PIXELS_B16; @@ -407,7 +411,8 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) { for (i = 0; i < n_frames; i++) { MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; vpx_memset(frame_stats->mb_stats, 0, - cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats)); + cm->mb_rows * cm->mb_cols * + sizeof(*cpi->mbgraph_stats[i].mb_stats)); } // do motion search to find contribution of each reference to data diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 03a90ffae..e7384bab5 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -62,6 +62,12 @@ static void set_default_lf_deltas(struct loopfilter *lf); now so that HIGH_PRECISION is always chosen */ +// Masks for partially or completely disabling split mode +#define DISABLE_ALL_SPLIT 0x3F +#define DISABLE_ALL_INTER_SPLIT 0x1F +#define DISABLE_COMPOUND_SPLIT 0x18 +#define LAST_AND_INTRA_SPLIT_ONLY 0x1E + #if CONFIG_INTERNAL_STATS #include "math.h" @@ -681,6 +687,12 @@ static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) { sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500; sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + // Check for masked out split cases. + for (i = 0; i < MAX_REFS; i++) { + if (sf->disable_split_mask & (1 << i)) + sf->thresh_mult_sub8x8[i] = INT_MAX; + } + // disable mode test if frame flag is not set if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX; @@ -694,14 +706,6 @@ static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) { if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; - - if (sf->disable_splitmv == 1) { - sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX; - sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; - sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; - sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; - sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; - } } void vp9_set_speed_features(VP9_COMP *cpi) { @@ -744,7 +748,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4; - sf->disable_splitmv = 0; + sf->disable_split_mask = 0; sf->mode_search_skip_flags = 0; sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 0; @@ -785,8 +789,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only) ? USE_FULL_RD : USE_LARGESTALL); - sf->disable_splitmv = - (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + + if (MIN(cpi->common.width, cpi->common.height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; @@ -799,8 +806,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only) ? USE_FULL_RD : USE_LARGESTALL); - sf->disable_splitmv = - (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + + if (MIN(cpi->common.width, cpi->common.height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | @@ -825,8 +835,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (speed == 3) { sf->use_square_partition_only = 1; sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_splitmv = - (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + + if (MIN(cpi->common.width, cpi->common.height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | @@ -855,42 +868,43 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_skip_start = 6; } if (speed == 4) { - sf->less_rectangular_check = 1; sf->use_square_partition_only = 1; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || - cpi->common.intra_only || - cpi->common.show_frame == 0) ? - USE_FULL_RD : - USE_LARGESTALL); + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_COMP_REFMISMATCH | FLAG_SKIP_INTRA_LOWVAR | FLAG_EARLY_TERMINATE; - sf->intra_y_mode_mask = INTRA_DC_ONLY; - sf->intra_uv_mode_mask = INTRA_DC_ONLY; - sf->use_uv_intra_rd_estimate = 1; + sf->use_rd_breakout = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; sf->adaptive_motion_search = 1; - sf->using_small_partition_info = 0; - sf->disable_splitmv = 1; sf->auto_mv_step_size = 1; - sf->search_method = BIGDIA; - sf->subpel_iters_per_step = 1; - sf->use_fast_lpf_pick = 1; + + sf->disable_filter_search_var_thresh = 16; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->auto_min_max_partition_size = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 64; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->use_lp32x32fdct = 1; + sf->subpel_iters_per_step = 1; sf->use_fast_coef_updates = 2; + sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; + + /* sf->intra_y_mode_mask = INTRA_DC_ONLY; + sf->intra_uv_mode_mask = INTRA_DC_ONLY; + sf->search_method = BIGDIA; + sf->disable_split_var_thresh = 64; + sf->disable_filter_search_var_thresh = 64; */ } if (speed == 5) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -914,7 +928,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // sf->reduce_first_step_size = 1; // sf->reference_masking = 1; - sf->disable_splitmv = 1; + sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->search_method = HEX; sf->subpel_iters_per_step = 1; sf->disable_split_var_thresh = 64; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7c73f65b4..2652929ce 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -271,7 +271,7 @@ typedef struct { BLOCK_SIZE max_partition_size; int adjust_partitioning_from_last_frame; int last_partitioning_redo_frequency; - int disable_splitmv; + int disable_split_mask; int using_small_partition_info; // TODO(jingning): combine the related motion search speed features int adaptive_motion_search; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0e62d3118..26bbc825e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -667,6 +667,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[bs]; const int num_4x4_h = num_4x4_blocks_high_lookup[bs]; + const uint8_t *band_translate; // just for the get_scan_and_band call struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size, num_4x4_w, num_4x4_h, @@ -678,26 +679,9 @@ static void txfm_rd_in_plane(MACROBLOCK *x, vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left, pd->above_context, pd->left_context, num_4x4_w, num_4x4_h); - switch (tx_size) { - case TX_4X4: - get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0), - &args.scan, &args.nb); - break; - case TX_8X8: - get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd), - &args.scan, &args.nb); - break; - case TX_16X16: - get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd), - &args.scan, &args.nb); - break; - case TX_32X32: - args.scan = vp9_default_scan_32x32; - args.nb = vp9_default_scan_32x32_neighbors; - break; - default: - assert(0); - } + + get_scan_and_band(xd, tx_size, pd->plane_type, 0, &args.scan, &args.nb, + &band_translate); foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args); if (args.skip) { @@ -3926,33 +3910,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, ref_frame = vp9_ref_order[mode_index].ref_frame; second_ref_frame = vp9_ref_order[mode_index].second_ref_frame; - // FIXME(jingning): this was temporarily disabled for sub8x8 blocks. - // Look at the reference frame of the best mode so far and set the - // skip mask to look at a subset of the remaining modes. - if (0 && mode_index > cpi->sf.mode_skip_start) { - if (mode_index == (cpi->sf.mode_skip_start + 1)) { - switch (vp9_ref_order[best_mode_index].ref_frame) { - case INTRA_FRAME: - cpi->mode_skip_mask = 0; - break; - case LAST_FRAME: - cpi->mode_skip_mask = LAST_FRAME_MODE_MASK; - break; - case GOLDEN_FRAME: - cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK; - break; - case ALTREF_FRAME: - cpi->mode_skip_mask = ALT_REF_MODE_MASK; - break; - case NONE: - case MAX_REF_FRAMES: - assert(!"Invalid Reference frame"); - } - } - if (cpi->mode_skip_mask & ((int64_t)1 << mode_index)) - continue; - } - // Skip if the current reference frame has been masked off if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && (cpi->ref_frame_mask & (1 << ref_frame))) @@ -4089,7 +4046,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < TX_MODES; ++i) tx_cache[i] = tx_cache[ONLY_4X4]; } else { - const int is_comp_pred = second_ref_frame > 0; int rate; int64_t distortion; int64_t this_rd_thresh; @@ -4098,23 +4054,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; int tmp_best_skippable = 0; int switchable_filter_index; - int_mv *second_ref = is_comp_pred ? - &mbmi->ref_mvs[second_ref_frame][0] : NULL; + int_mv *second_ref = comp_pred ? + &mbmi->ref_mvs[second_ref_frame][0] : NULL; b_mode_info tmp_best_bmodes[16]; MB_MODE_INFO tmp_best_mbmode; PARTITION_INFO tmp_best_partition; BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; int pred_exists = 0; int uv_skippable; - if (is_comp_pred) { - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) - if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) - continue; - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) - if (ref_frame != best_inter_ref_frame && - second_ref_frame != best_inter_ref_frame) - continue; - } this_rd_thresh = (ref_frame == LAST_FRAME) ? cpi->rd_thresh_sub8x8[bsize][THR_LAST] : @@ -4239,12 +4186,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate2 += get_switchable_rate(x); if (!mode_excluded) { - if (is_comp_pred) + if (comp_pred) mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; else mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; } - compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred); + compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); tmp_best_rdu = best_rd - MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 54c9706a5..3a27cdd04 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -32,8 +32,6 @@ VP9_DX_SRCS-yes += decoder/vp9_thread.c VP9_DX_SRCS-yes += decoder/vp9_thread.h VP9_DX_SRCS-yes += decoder/vp9_treereader.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c -VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c -VP9_DX_SRCS-yes += decoder/vp9_idct_blk.h VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h |