diff options
-rw-r--r-- | build/make/Android.mk | 21 | ||||
-rw-r--r-- | build/make/configure.sh | 55 | ||||
-rwxr-xr-x | configure | 3 | ||||
-rw-r--r-- | test/android/README | 4 | ||||
-rw-r--r-- | test/svc_datarate_test.cc | 2 | ||||
-rw-r--r-- | vp9/common/vp9_blockd.h | 22 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 55 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 30 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 67 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.h | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 57 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 6 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp.mk | 2 | ||||
-rw-r--r-- | vpx_util/vpx_debug_util.c | 230 | ||||
-rw-r--r-- | vpx_util/vpx_debug_util.h | 29 | ||||
-rw-r--r-- | vpx_util/vpx_util.mk | 4 |
17 files changed, 445 insertions, 159 deletions
diff --git a/build/make/Android.mk b/build/make/Android.mk index a88f90056..4969aa91e 100644 --- a/build/make/Android.mk +++ b/build/make/Android.mk @@ -14,7 +14,7 @@ # Run the configure script from the jni directory. Base libvpx # encoder/decoder configuration will look similar to: # ./libvpx/configure --target=armv7-android-gcc --disable-examples \ -# --sdk-path=/opt/android-ndk-r6b/ +# --enable-external-build # # When targeting Android, realtime-only is enabled by default. This can # be overridden by adding the command line flag: @@ -41,25 +41,6 @@ # Running ndk-build will build libvpx and include it in your project. # -# Alternatively, building the examples and unit tests can be accomplished in the -# following way: -# -# Create a standalone toolchain from the NDK: -# https://developer.android.com/ndk/guides/standalone_toolchain.html -# -# For example - to test on arm64 devices with clang: -# $NDK/build/tools/make_standalone_toolchain.py \ -# --arch arm64 --install-dir=/tmp/my-android-toolchain -# export PATH=/tmp/my-android-toolchain/bin:$PATH -# CROSS=aarch64-linux-android- CC=clang CXX=clang++ /path/to/libvpx/configure \ -# --target=arm64-android-gcc -# -# Push the resulting binaries to a device and run them: -# adb push test_libvpx /data/tmp/test_libvpx -# adb shell /data/tmp/test_libvpx --gtest_filter=\*Sixtap\* -# -# Make sure to push the test data as well and set LIBVPX_TEST_DATA - CONFIG_DIR := $(LOCAL_PATH)/ LIBVPX_PATH := $(LOCAL_PATH)/libvpx ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas diff --git a/build/make/configure.sh b/build/make/configure.sh index ce3ba5567..8f2928a9d 100644 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -646,11 +646,7 @@ process_common_cmdline() { --libdir=*) libdir="${optval}" ;; - --sdk-path=*) - [ -d "${optval}" ] || die "Not a directory: ${optval}" - sdk_path="${optval}" - ;; - --libc|--as|--prefix|--libdir|--sdk-path) + --libc|--as|--prefix|--libdir) die "Option ${opt} requires argument" ;; --help|-h) @@ -1101,51 +1097,10 @@ EOF ;; android*) - if [ -n "${sdk_path}" ]; then - SDK_PATH=${sdk_path} - COMPILER_LOCATION=`find "${SDK_PATH}" \ - -name "arm-linux-androideabi-gcc*" -print -quit` - TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi- - CC=${TOOLCHAIN_PATH}gcc - CXX=${TOOLCHAIN_PATH}g++ - AR=${TOOLCHAIN_PATH}ar - LD=${TOOLCHAIN_PATH}gcc - AS=${TOOLCHAIN_PATH}as - STRIP=${TOOLCHAIN_PATH}strip - NM=${TOOLCHAIN_PATH}nm - - if [ -z "${alt_libc}" ]; then - alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \ - awk '{n = split($0,a,"/"); \ - split(a[n-1],b,"-"); \ - print $0 " " b[2]}' | \ - sort -g -k 2 | \ - awk '{ print $1 }' | tail -1` - fi - - if [ -d "${alt_libc}" ]; then - add_cflags "--sysroot=${alt_libc}" - add_ldflags "--sysroot=${alt_libc}" - fi - - # linker flag that routes around a CPU bug in some - # Cortex-A8 implementations (NDK Dev Guide) - add_ldflags "-Wl,--fix-cortex-a8" - - enable_feature pic - soft_enable realtime_only - if [ ${tgt_isa} = "armv7" ]; then - soft_enable runtime_cpu_detect - fi - if enabled runtime_cpu_detect; then - add_cflags "-I${SDK_PATH}/sources/android/cpufeatures" - fi - else - echo "Assuming standalone build with NDK toolchain." - echo "See build/make/Android.mk for details." - check_add_ldflags -static - soft_enable unit_tests - fi + echo "Assuming standalone build with NDK toolchain." + echo "See build/make/Android.mk for details." + check_add_ldflags -static + soft_enable unit_tests ;; darwin*) @@ -31,7 +31,6 @@ Advanced options: --libc=PATH path to alternate libc --size-limit=WxH max size to allow in the decoder --as={yasm|nasm|auto} use specified assembler [auto, yasm preferred] - --sdk-path=PATH path to root of sdk (android builds only) ${toggle_codec_srcs} in/exclude codec library source code ${toggle_debug_libs} in/exclude debug version of libraries ${toggle_static_msvcrt} use static MSVCRT (VS builds only) @@ -329,6 +328,7 @@ CONFIG_LIST=" size_limit always_adjust_bpm bitstream_debug + mismatch_debug ${EXPERIMENT_LIST} " CMDLINE_SELECT=" @@ -391,6 +391,7 @@ CMDLINE_SELECT=" experimental always_adjust_bpm bitstream_debug + mismatch_debug " process_cmdline() { diff --git a/test/android/README b/test/android/README index 4a1adcf7f..ee21f9b65 100644 --- a/test/android/README +++ b/test/android/README @@ -3,12 +3,12 @@ Android.mk will build vpx unittests on android. ./libvpx/configure --target=armv7-android-gcc --enable-external-build \ --enable-postproc --disable-install-srcs --enable-multi-res-encoding \ --enable-temporal-denoising --disable-unit-tests --disable-install-docs \ - --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK + --disable-examples --disable-runtime-cpu-detect 2) From the parent directory, invoke ndk-build: NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk \ APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \ - APP_STL=gnustl_static + APP_STL=c++_static Note: Both adb and ndk-build are available prebuilt at: https://chromium.googlesource.com/android_tools diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc index 6936b5649..bb9b0755f 100644 --- a/test/svc_datarate_test.cc +++ b/test/svc_datarate_test.cc @@ -937,7 +937,7 @@ TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc2SL3TL4Threads) { layer_framedrop_ = GET_PARAM(2); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.64, + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.65, 1.45); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 504342fdf..2ddc0f121 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -286,6 +286,28 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); +#if CONFIG_MISMATCH_DEBUG +#define TX_UNIT_SIZE_LOG2 2 +static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col, + int mi_row, int tx_blk_col, int tx_blk_row, + int subsampling_x, int subsampling_y) { + *pixel_c = ((mi_col << MI_SIZE_LOG2) >> subsampling_x) + + (tx_blk_col << TX_UNIT_SIZE_LOG2); + *pixel_r = ((mi_row << MI_SIZE_LOG2) >> subsampling_y) + + (tx_blk_row << TX_UNIT_SIZE_LOG2); +} + +static INLINE int get_block_width(BLOCK_SIZE bsize) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + return 4 * num_4x4_w; +} + +static INLINE int get_block_height(BLOCK_SIZE bsize) { + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; + return 4 * num_4x4_h; +} +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 22ae0fac9..7d66cb2b2 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -23,9 +23,9 @@ #include "vpx_ports/mem_ops.h" #include "vpx_scale/vpx_scale.h" #include "vpx_util/vpx_thread.h" -#if CONFIG_BITSTREAM_DEBUG +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" -#endif // CONFIG_BITSTREAM_DEBUG +#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_common.h" @@ -389,19 +389,32 @@ static void predict_and_reconstruct_intra_block_row_mt(TileWorkerData *twd, } static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi, - int plane, int row, int col, - TX_SIZE tx_size) { + int plane, int row, int col, TX_SIZE tx_size, + int mi_row, int mi_col) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *sc = &vp9_default_scan_orders[tx_size]; const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, mi->segment_id); + uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; if (eob > 0) { - inverse_transform_block_inter( - xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], - pd->dst.stride, eob); + inverse_transform_block_inter(xd, plane, tx_size, dst, pd->dst.stride, eob); } +#if CONFIG_MISMATCH_DEBUG + { + int pixel_c, pixel_r; + int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, + pd->subsampling_x, pd->subsampling_y); + mismatch_check_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, blk_w, + blk_h, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } +#else + (void)mi_row; + (void)mi_col; +#endif return eob; } @@ -952,6 +965,24 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, } else { // Prediction dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); +#if CONFIG_MISMATCH_DEBUG + { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int pixel_c, pixel_r; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); + const int bw = get_block_width(plane_bsize); + const int bh = get_block_height(plane_bsize); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, + pd->subsampling_x, pd->subsampling_y); + mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, + pixel_r, bw, bh, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } + } +#endif // Reconstruction if (!mi->skip) { @@ -980,8 +1011,8 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) - eobtotal += - reconstruct_inter_block(twd, mi, plane, row, col, tx_size); + eobtotal += reconstruct_inter_block(twd, mi, plane, row, col, + tx_size, mi_row, mi_col); } if (!less8x8 && eobtotal == 0) mi->skip = 1; // skip loopfilter @@ -2923,10 +2954,12 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, const int tile_rows = 1 << cm->log2_tile_rows; const int tile_cols = 1 << cm->log2_tile_cols; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); -#if CONFIG_BITSTREAM_DEBUG +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame); #endif - +#if CONFIG_MISMATCH_DEBUG + mismatch_move_frame_idx_r(); +#endif xd->cur_buf = new_fb; if (!first_partition_size) { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 28a900514..e510ee1fd 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -22,6 +22,10 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_ports/system_state.h" +#if CONFIG_MISMATCH_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_MISMATCH_DEBUG + #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" @@ -6105,6 +6109,10 @@ void vp9_encode_frame(VP9_COMP *cpi) { restore_encode_params(cpi); #endif +#if CONFIG_MISMATCH_DEBUG + mismatch_reset_frame(MAX_MB_PLANE); +#endif + // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a // different sign bias and that buffer is then the fixed ref. However, this @@ -6348,7 +6356,27 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); - vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); +#if CONFIG_MISMATCH_DEBUG + if (output_enabled) { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int pixel_c, pixel_r; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); + const int bw = get_block_width(plane_bsize); + const int bh = get_block_height(plane_bsize); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, + pd->subsampling_x, pd->subsampling_y); + + mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, + pixel_r, bw, bh, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } + } +#endif + + vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled); vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, VPXMAX(bsize, BLOCK_8X8)); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 83cb37c2d..7630a8110 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -16,6 +16,10 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" +#if CONFIG_MISMATCH_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif + #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" @@ -579,6 +583,11 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, static void encode_block(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; +#if CONFIG_MISMATCH_DEBUG + int mi_row = args->mi_row; + int mi_col = args->mi_col; + int output_enabled = args->output_enabled; +#endif MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; @@ -595,7 +604,11 @@ static void encode_block(int plane, int block, int row, int col, if (x->zcoeff_blk[tx_size][block] && plane == 0) { p->eobs[block] = 0; *a = *l = 0; +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } if (!x->skip_recode) { @@ -605,7 +618,11 @@ static void encode_block(int plane, int block, int row, int col, // skip forward transform p->eobs[block] = 0; *a = *l = 0; +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } else { vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); } @@ -622,7 +639,11 @@ static void encode_block(int plane, int block, int row, int col, // skip forward transform p->eobs[block] = 0; *a = *l = 0; +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } } else { vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); @@ -639,7 +660,13 @@ static void encode_block(int plane, int block, int row, int col, if (p->eobs[block]) *(args->skip) = 0; - if (x->skip_encode || p->eobs[block] == 0) return; + if (x->skip_encode || p->eobs[block] == 0) { +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else + return; +#endif + } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); @@ -665,7 +692,11 @@ static void encode_block(int plane, int block, int row, int col, xd->bd); break; } +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -687,6 +718,19 @@ static void encode_block(int plane, int block, int row, int col, x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; } +#if CONFIG_MISMATCH_DEBUG +encode_block_end: + if (output_enabled) { + int pixel_c, pixel_r; + int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, + pd->subsampling_x, pd->subsampling_y); + mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, + blk_w, blk_h, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } +#endif } static void encode_block_pass1(int plane, int block, int row, int col, @@ -720,12 +764,21 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { encode_block_pass1, x); } -void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { +void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, + int output_enabled) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; MODE_INFO *mi = xd->mi[0]; - struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; int plane; +#if CONFIG_MISMATCH_DEBUG + struct encode_b_args arg = { x, 1, NULL, NULL, + &mi->skip, mi_row, mi_col, output_enabled }; +#else + struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; + (void)mi_row; + (void)mi_col; + (void)output_enabled; +#endif mi->skip = 1; @@ -986,8 +1039,16 @@ void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, int enable_optimize_b) { const MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; +#if CONFIG_MISMATCH_DEBUG + // TODO(angiebird): make mismatch_debug support intra mode + struct encode_b_args arg = { + x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0, + 0 + }; +#else struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip }; +#endif if (enable_optimize_b && x->optimize && (!x->skip_recode || !x->skip_optimize)) { diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index fa41f70ef..1975ee73a 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -24,10 +24,16 @@ struct encode_b_args { ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; int8_t *skip; +#if CONFIG_MISMATCH_DEBUG + int mi_row; + int mi_col; + int output_enabled; +#endif }; int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx); -void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); +void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, + int output_enabled); void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index a5b89ee96..fde91fadc 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -26,9 +26,9 @@ #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_timer.h" -#if CONFIG_BITSTREAM_DEBUG +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" -#endif // CONFIG_BITSTREAM_DEBUG +#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_filter.h" @@ -5255,6 +5255,9 @@ static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; +#if CONFIG_MISMATCH_DEBUG + mismatch_move_frame_idx_w(); +#endif encode_frame_to_data_rate(cpi, size, dest, frame_flags); vp9_twopass_postencode_update(cpi); @@ -7374,6 +7377,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, assert(cpi->oxcf.max_threads == 0 && "bitstream debug tool does not support multithreading"); bitstream_queue_record_write(); +#endif +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame); #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index b483489d3..a431e4ca6 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -376,50 +376,8 @@ static TX_SIZE calculate_tx_size(VP9_COMP *const cpi, BLOCK_SIZE bsize, tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } - return tx_size; -} -static void compute_intra_yprediction(PREDICTION_MODE mode, BLOCK_SIZE bsize, - MACROBLOCK *x, MACROBLOCKD *xd) { - struct macroblockd_plane *const pd = &xd->plane[0]; - struct macroblock_plane *const p = &x->plane[0]; - uint8_t *const src_buf_base = p->src.buf; - uint8_t *const dst_buf_base = pd->dst.buf; - const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; - // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") - // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - const TX_SIZE tx_size = max_txsize_lookup[bsize]; - const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; - int row, col; - // If mb_to_right_edge is < 0 we are in a situation in which - // the current block size extends into the UMV and we won't - // visit the sub blocks that are wholly within the UMV. - const int max_blocks_wide = - num_4x4_w + (xd->mb_to_right_edge >= 0 - ? 0 - : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = - num_4x4_h + (xd->mb_to_bottom_edge >= 0 - ? 0 - : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - // Keep track of the row and column of the blocks we use so that we know - // if we are in the unrestricted motion border. - for (row = 0; row < max_blocks_high; row += (1 << tx_size)) { - // Skip visiting the sub blocks that are wholly within the UMV. - for (col = 0; col < max_blocks_wide; col += (1 << tx_size)) { - p->src.buf = &src_buf_base[4 * (row * src_stride + col)]; - pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)]; - vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, - x->skip_encode ? p->src.buf : pd->dst.buf, - x->skip_encode ? src_stride : dst_stride, - pd->dst.buf, dst_stride, col, row, 0); - } - } - p->src.buf = src_buf_base; - pd->dst.buf = dst_buf_base; + return tx_size; } static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -1065,6 +1023,7 @@ static void estimate_block_intra(int plane, int block, int row, int col, if (plane == 0) { int64_t this_sse = INT64_MAX; + // TODO(jingning): This needs further refactoring. block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx, VPXMIN(tx_size, TX_16X16), 0, 1); } else { @@ -2513,12 +2472,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad && !x->lowvar_highsumdiff)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; - int64_t this_sse = INT64_MAX; int i; PRED_BUFFER *const best_pred = best_pickmode.best_pred; TX_SIZE intra_tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16) + intra_tx_size = TX_16X16; if (reuse_inter_pred && best_pred != NULL) { if (best_pred->data == orig_dst.buf) { @@ -2579,13 +2539,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, args.skippable = 1; args.rdc = &this_rdc; mi->tx_size = intra_tx_size; - - compute_intra_yprediction(this_mode, bsize, x, xd); - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, - &var_y, &sse_y, 1); - block_yrd(cpi, x, &this_rdc, &args.skippable, &this_sse, bsize, - VPXMIN(mi->tx_size, TX_16X16), 1, 1); - + vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra, + &args); // Check skip cost here since skippable is not set for for uv, this // mirrors the behavior used by inter if (args.skippable) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index ec7d2fac4..6c7c4e0f8 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -751,8 +751,14 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, if (args->exit_early) return; if (!is_inter_block(mi)) { +#if CONFIG_MISMATCH_DEBUG + struct encode_b_args intra_arg = { + x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip, 0, 0, 0 + }; +#else struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip }; +#endif vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, &intra_arg); if (recon) { diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 91ce96bb6..343250702 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -116,6 +116,7 @@ DSP_SRCS-$(HAVE_NEON) += arm/vpx_scaled_convolve8_neon.c ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM) +ifeq ($(CONFIG_VP9),yes) DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type2_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_vert_filter_type2_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type1_neon$(ASM) @@ -128,6 +129,7 @@ DSP_SRCS-yes += arm/vpx_convolve_avg_neon_asm$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.c DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.h DSP_SRCS-yes += arm/vpx_convolve_neon.c +endif # CONFIG_VP9 else ifeq ($(HAVE_NEON),yes) DSP_SRCS-yes += arm/vpx_convolve_copy_neon.c diff --git a/vpx_util/vpx_debug_util.c b/vpx_util/vpx_debug_util.c index ea8646fba..3ce4065ba 100644 --- a/vpx_util/vpx_debug_util.c +++ b/vpx_util/vpx_debug_util.c @@ -13,16 +13,7 @@ #include <string.h> #include "vpx_util/vpx_debug_util.h" -#if CONFIG_BITSTREAM_DEBUG -#define QUEUE_MAX_SIZE 2000000 -static int result_queue[QUEUE_MAX_SIZE]; -static int prob_queue[QUEUE_MAX_SIZE]; - -static int queue_r = 0; -static int queue_w = 0; -static int queue_prev_w = -1; -static int skip_r = 0; -static int skip_w = 0; +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG static int frame_idx_w = 0; static int frame_idx_r = 0; @@ -33,7 +24,18 @@ int bitstream_queue_get_frame_write(void) { return frame_idx_w; } void bitstream_queue_set_frame_read(int frame_idx) { frame_idx_r = frame_idx; } int bitstream_queue_get_frame_read(void) { return frame_idx_r; } +#endif + +#if CONFIG_BITSTREAM_DEBUG +#define QUEUE_MAX_SIZE 2000000 +static int result_queue[QUEUE_MAX_SIZE]; +static int prob_queue[QUEUE_MAX_SIZE]; +static int queue_r = 0; +static int queue_w = 0; +static int queue_prev_w = -1; +static int skip_r = 0; +static int skip_w = 0; void bitstream_queue_set_skip_write(int skip) { skip_w = skip; } void bitstream_queue_set_skip_read(int skip) { skip_r = skip; } @@ -70,3 +72,211 @@ void bitstream_queue_push(int result, const int prob) { } } #endif // CONFIG_BITSTREAM_DEBUG + +#if CONFIG_MISMATCH_DEBUG +static int frame_buf_idx_r = 0; +static int frame_buf_idx_w = 0; +#define MAX_FRAME_BUF_NUM 20 +#define MAX_FRAME_STRIDE 1920 +#define MAX_FRAME_HEIGHT 1080 +static uint16_t + frame_pre[MAX_FRAME_BUF_NUM][3] + [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction only +static uint16_t + frame_tx[MAX_FRAME_BUF_NUM][3] + [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction + txfm +static int frame_stride = MAX_FRAME_STRIDE; +static int frame_height = MAX_FRAME_HEIGHT; +static int frame_size = MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT; +void mismatch_move_frame_idx_w(void) { + frame_buf_idx_w = (frame_buf_idx_w + 1) % MAX_FRAME_BUF_NUM; + if (frame_buf_idx_w == frame_buf_idx_r) { + printf("frame_buf overflow\n"); + assert(0); + } +} + +void mismatch_reset_frame(int num_planes) { + int plane; + for (plane = 0; plane < num_planes; ++plane) { + memset(frame_pre[frame_buf_idx_w][plane], 0, + sizeof(frame_pre[frame_buf_idx_w][plane][0]) * frame_size); + memset(frame_tx[frame_buf_idx_w][plane], 0, + sizeof(frame_tx[frame_buf_idx_w][plane][0]) * frame_size); + } +} + +void mismatch_move_frame_idx_r(void) { + if (frame_buf_idx_w == frame_buf_idx_r) { + printf("frame_buf underflow\n"); + assert(0); + } + frame_buf_idx_r = (frame_buf_idx_r + 1) % MAX_FRAME_BUF_NUM; +} + +void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int r, c; + + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + frame_pre[frame_buf_idx_w][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] = + src16 ? src16[r * src_stride + c] : src[r * src_stride + c]; + } + } +#if 0 + { + int ref_frame_idx = 3; + int ref_plane = 1; + int ref_pixel_c = 162; + int ref_pixel_r = 16; + if (frame_idx_w == ref_frame_idx && plane == ref_plane && + ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w && + ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) { + printf( + "\nrecord_block_pre frame_idx %d plane %d pixel_c %d pixel_r %d blk_w" + " %d blk_h %d\n", + frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h); + } + } +#endif +} +void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int r, c; + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + frame_tx[frame_buf_idx_w][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] = + src16 ? src16[r * src_stride + c] : src[r * src_stride + c]; + } + } +#if 0 + { + int ref_frame_idx = 3; + int ref_plane = 1; + int ref_pixel_c = 162; + int ref_pixel_r = 16; + if (frame_idx_w == ref_frame_idx && plane == ref_plane && + ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w && + ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) { + printf( + "\nrecord_block_tx frame_idx %d plane %d pixel_c %d pixel_r %d blk_w " + "%d blk_h %d\n", + frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h); + } + } +#endif +} +void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int mismatch = 0; + int r, c; + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + if (frame_pre[frame_buf_idx_r][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] != + (uint16_t)(src16 ? src16[r * src_stride + c] + : src[r * src_stride + c])) { + mismatch = 1; + } + } + } + if (mismatch) { + int rr, cc; + printf( + "\ncheck_block_pre failed frame_idx %d plane %d " + "pixel_c %d pixel_r " + "%d blk_w %d blk_h %d\n", + frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h); + printf("enc\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", frame_pre[frame_buf_idx_r][plane] + [(rr + pixel_r) * frame_stride + cc + pixel_c]); + } + printf("\n"); + } + + printf("dec\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", + src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]); + } + printf("\n"); + } + assert(0); + } +} +void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int mismatch = 0; + int r, c; + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + if (frame_tx[frame_buf_idx_r][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] != + (uint16_t)(src16 ? src16[r * src_stride + c] + : src[r * src_stride + c])) { + mismatch = 1; + } + } + } + if (mismatch) { + int rr, cc; + printf( + "\ncheck_block_tx failed frame_idx %d plane %d pixel_c " + "%d pixel_r " + "%d blk_w %d blk_h %d\n", + frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h); + printf("enc\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", frame_tx[frame_buf_idx_r][plane] + [(rr + pixel_r) * frame_stride + cc + pixel_c]); + } + printf("\n"); + } + + printf("dec\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", + src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]); + } + printf("\n"); + } + assert(0); + } +} +#endif // CONFIG_MISMATCH_DEBUG diff --git a/vpx_util/vpx_debug_util.h b/vpx_util/vpx_debug_util.h index e628f4305..df1a1aab2 100644 --- a/vpx_util/vpx_debug_util.h +++ b/vpx_util/vpx_debug_util.h @@ -19,6 +19,13 @@ extern "C" { #endif +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG +void bitstream_queue_set_frame_write(int frame_idx); +int bitstream_queue_get_frame_write(void); +void bitstream_queue_set_frame_read(int frame_idx); +int bitstream_queue_get_frame_read(void); +#endif + #if CONFIG_BITSTREAM_DEBUG /* This is a debug tool used to detect bitstream error. On encoder side, it * pushes each bit and probability into a queue before the bit is written into @@ -28,10 +35,6 @@ extern "C" { * an error. This tool can be used to pin down the bitstream error precisely. * By combining gdb's backtrace method, we can detect which module causes the * bitstream error. */ -void bitstream_queue_set_frame_write(int frame_idx); -int bitstream_queue_get_frame_write(void); -void bitstream_queue_set_frame_read(int frame_idx); -int bitstream_queue_get_frame_read(void); int bitstream_queue_get_write(void); int bitstream_queue_get_read(void); void bitstream_queue_record_write(void); @@ -42,6 +45,24 @@ void bitstream_queue_set_skip_write(int skip); void bitstream_queue_set_skip_read(int skip); #endif // CONFIG_BITSTREAM_DEBUG +#if CONFIG_MISMATCH_DEBUG +void mismatch_move_frame_idx_w(void); +void mismatch_move_frame_idx_r(void); +void mismatch_reset_frame(int num_planes); +void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +#endif // CONFIG_MISMATCH_DEBUG + #ifdef __cplusplus } // extern "C" #endif diff --git a/vpx_util/vpx_util.mk b/vpx_util/vpx_util.mk index e83a4c4ec..c97f3f322 100644 --- a/vpx_util/vpx_util.mk +++ b/vpx_util/vpx_util.mk @@ -15,5 +15,5 @@ UTIL_SRCS-yes += vpx_thread.h UTIL_SRCS-yes += endian_inl.h UTIL_SRCS-yes += vpx_write_yuv_frame.h UTIL_SRCS-yes += vpx_write_yuv_frame.c -UTIL_SRCS-$(CONFIG_BITSTREAM_DEBUG) += vpx_debug_util.h -UTIL_SRCS-$(CONFIG_BITSTREAM_DEBUG) += vpx_debug_util.c +UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.h +UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.c |