17 files changed, 445 insertions, 159 deletions
diff --git a/build/make/Android.mk b/build/make/Android.mk
index a88f90056..4969aa91e 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -14,7 +14,7 @@
 # Run the configure script from the jni directory.  Base libvpx
 # encoder/decoder configuration will look similar to:
 # ./libvpx/configure --target=armv7-android-gcc --disable-examples \
-#                    --sdk-path=/opt/android-ndk-r6b/
+#                    --enable-external-build
 #
 # When targeting Android, realtime-only is enabled by default.  This can
 # be overridden by adding the command line flag:
@@ -41,25 +41,6 @@
 # Running ndk-build will build libvpx and include it in your project.
 #
 
-# Alternatively, building the examples and unit tests can be accomplished in the
-# following way:
-#
-# Create a standalone toolchain from the NDK:
-# https://developer.android.com/ndk/guides/standalone_toolchain.html
-#
-# For example - to test on arm64 devices with clang:
-# $NDK/build/tools/make_standalone_toolchain.py \
-#   --arch arm64 --install-dir=/tmp/my-android-toolchain
-# export PATH=/tmp/my-android-toolchain/bin:$PATH
-# CROSS=aarch64-linux-android- CC=clang CXX=clang++ /path/to/libvpx/configure \
-#   --target=arm64-android-gcc
-#
-# Push the resulting binaries to a device and run them:
-# adb push test_libvpx /data/tmp/test_libvpx
-# adb shell /data/tmp/test_libvpx --gtest_filter=\*Sixtap\*
-#
-# Make sure to push the test data as well and set LIBVPX_TEST_DATA
-
 CONFIG_DIR := $(LOCAL_PATH)/
 LIBVPX_PATH := $(LOCAL_PATH)/libvpx
 ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
diff --git a/build/make/configure.sh b/build/make/configure.sh
index ce3ba5567..8f2928a9d 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -646,11 +646,7 @@ process_common_cmdline() {
       --libdir=*)
         libdir="${optval}"
         ;;
-      --sdk-path=*)
-        [ -d "${optval}" ] || die "Not a directory: ${optval}"
-        sdk_path="${optval}"
-        ;;
-      --libc|--as|--prefix|--libdir|--sdk-path)
+      --libc|--as|--prefix|--libdir)
         die "Option ${opt} requires argument"
         ;;
       --help|-h)
@@ -1101,51 +1097,10 @@ EOF
           ;;
 
         android*)
-          if [ -n "${sdk_path}" ]; then
-            SDK_PATH=${sdk_path}
-            COMPILER_LOCATION=`find "${SDK_PATH}" \
-              -name "arm-linux-androideabi-gcc*" -print -quit`
-            TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
-            CC=${TOOLCHAIN_PATH}gcc
-            CXX=${TOOLCHAIN_PATH}g++
-            AR=${TOOLCHAIN_PATH}ar
-            LD=${TOOLCHAIN_PATH}gcc
-            AS=${TOOLCHAIN_PATH}as
-            STRIP=${TOOLCHAIN_PATH}strip
-            NM=${TOOLCHAIN_PATH}nm
-
-            if [ -z "${alt_libc}" ]; then
-              alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
-                awk '{n = split($0,a,"/"); \
-                split(a[n-1],b,"-"); \
-                print $0 " " b[2]}' | \
-                sort -g -k 2 | \
-                awk '{ print $1 }' | tail -1`
-            fi
-
-            if [ -d "${alt_libc}" ]; then
-              add_cflags "--sysroot=${alt_libc}"
-              add_ldflags "--sysroot=${alt_libc}"
-            fi
-
-            # linker flag that routes around a CPU bug in some
-            # Cortex-A8 implementations (NDK Dev Guide)
-            add_ldflags "-Wl,--fix-cortex-a8"
-
-            enable_feature pic
-            soft_enable realtime_only
-            if [ ${tgt_isa} = "armv7" ]; then
-              soft_enable runtime_cpu_detect
-            fi
-            if enabled runtime_cpu_detect; then
-              add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
-            fi
-          else
-            echo "Assuming standalone build with NDK toolchain."
-            echo "See build/make/Android.mk for details."
-            check_add_ldflags -static
-            soft_enable unit_tests
-          fi
+          echo "Assuming standalone build with NDK toolchain."
+          echo "See build/make/Android.mk for details."
+          check_add_ldflags -static
+          soft_enable unit_tests
           ;;
 
         darwin*)
diff --git a/configure b/configure
index f1f5995be..5e7c50ad3 100755
--- a/configure
+++ b/configure
@@ -31,7 +31,6 @@ Advanced options:
   --libc=PATH                     path to alternate libc
   --size-limit=WxH                max size to allow in the decoder
   --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
-  --sdk-path=PATH                 path to root of sdk (android builds only)
   ${toggle_codec_srcs}            in/exclude codec library source code
   ${toggle_debug_libs}            in/exclude debug version of libraries
   ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
@@ -329,6 +328,7 @@ CONFIG_LIST="
     size_limit
     always_adjust_bpm
     bitstream_debug
+    mismatch_debug
     ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
@@ -391,6 +391,7 @@ CMDLINE_SELECT="
     experimental
     always_adjust_bpm
     bitstream_debug
+    mismatch_debug
 "
 
 process_cmdline() {
diff --git a/test/android/README b/test/android/README
index 4a1adcf7f..ee21f9b65 100644
--- a/test/android/README
+++ b/test/android/README
@@ -3,12 +3,12 @@ Android.mk will build vpx unittests on android.
 ./libvpx/configure --target=armv7-android-gcc --enable-external-build \
   --enable-postproc --disable-install-srcs --enable-multi-res-encoding \
   --enable-temporal-denoising --disable-unit-tests --disable-install-docs \
-  --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK
+  --disable-examples --disable-runtime-cpu-detect
 
 2) From the parent directory, invoke ndk-build:
 NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk \
   APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \
-  APP_STL=gnustl_static
+  APP_STL=c++_static
 
 Note: Both adb and ndk-build are available prebuilt at:
   https://chromium.googlesource.com/android_tools
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 6936b5649..bb9b0755f 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -937,7 +937,7 @@ TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc2SL3TL4Threads) {
   layer_framedrop_ = GET_PARAM(2);
   AssignLayerBitrates();
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.64,
+  CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.65,
                           1.45);
 #if CONFIG_VP9_DECODER
   // The non-reference frames are expected to be mismatched frames as the
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 504342fdf..2ddc0f121 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -286,6 +286,28 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
                       BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
                       int aoff, int loff);
 
+#if CONFIG_MISMATCH_DEBUG
+#define TX_UNIT_SIZE_LOG2 2
+static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col,
+                                   int mi_row, int tx_blk_col, int tx_blk_row,
+                                   int subsampling_x, int subsampling_y) {
+  *pixel_c = ((mi_col << MI_SIZE_LOG2) >> subsampling_x) +
+             (tx_blk_col << TX_UNIT_SIZE_LOG2);
+  *pixel_r = ((mi_row << MI_SIZE_LOG2) >> subsampling_y) +
+             (tx_blk_row << TX_UNIT_SIZE_LOG2);
+}
+
+static INLINE int get_block_width(BLOCK_SIZE bsize) {
+  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+  return 4 * num_4x4_w;
+}
+
+static INLINE int get_block_height(BLOCK_SIZE bsize) {
+  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+  return 4 * num_4x4_h;
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 22ae0fac9..7d66cb2b2 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -23,9 +23,9 @@
 #include "vpx_ports/mem_ops.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vpx_util/vpx_thread.h"
-#if CONFIG_BITSTREAM_DEBUG
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 #include "vpx_util/vpx_debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
+#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_common.h"
@@ -389,19 +389,32 @@ static void predict_and_reconstruct_intra_block_row_mt(TileWorkerData *twd,
 }
 
 static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi,
-                                   int plane, int row, int col,
-                                   TX_SIZE tx_size) {
+                                   int plane, int row, int col, TX_SIZE tx_size,
+                                   int mi_row, int mi_col) {
   MACROBLOCKD *const xd = &twd->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const scan_order *sc = &vp9_default_scan_orders[tx_size];
   const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size,
                                           mi->segment_id);
+  uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
 
   if (eob > 0) {
-    inverse_transform_block_inter(
-        xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
-        pd->dst.stride, eob);
+    inverse_transform_block_inter(xd, plane, tx_size, dst, pd->dst.stride, eob);
   }
+#if CONFIG_MISMATCH_DEBUG
+  {
+    int pixel_c, pixel_r;
+    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
+    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
+    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
+                    pd->subsampling_x, pd->subsampling_y);
+    mismatch_check_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, blk_w,
+                            blk_h, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
+  }
+#else
+  (void)mi_row;
+  (void)mi_col;
+#endif
   return eob;
 }
 
@@ -952,6 +965,24 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
   } else {
     // Prediction
     dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+#if CONFIG_MISMATCH_DEBUG
+    {
+      int plane;
+      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+        const struct macroblockd_plane *pd = &xd->plane[plane];
+        int pixel_c, pixel_r;
+        const BLOCK_SIZE plane_bsize =
+            get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]);
+        const int bw = get_block_width(plane_bsize);
+        const int bh = get_block_height(plane_bsize);
+        mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
+                        pd->subsampling_x, pd->subsampling_y);
+        mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c,
+                                 pixel_r, bw, bh,
+                                 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
+      }
+    }
+#endif
 
     // Reconstruction
     if (!mi->skip) {
@@ -980,8 +1011,8 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
 
         for (row = 0; row < max_blocks_high; row += step)
           for (col = 0; col < max_blocks_wide; col += step)
-            eobtotal +=
-                reconstruct_inter_block(twd, mi, plane, row, col, tx_size);
+            eobtotal += reconstruct_inter_block(twd, mi, plane, row, col,
+                                                tx_size, mi_row, mi_col);
       }
 
       if (!less8x8 && eobtotal == 0) mi->skip = 1;  // skip loopfilter
@@ -2923,10 +2954,12 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
   const int tile_rows = 1 << cm->log2_tile_rows;
   const int tile_cols = 1 << cm->log2_tile_cols;
   YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
-#if CONFIG_BITSTREAM_DEBUG
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
   bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame);
 #endif
-
+#if CONFIG_MISMATCH_DEBUG
+  mismatch_move_frame_idx_r();
+#endif
   xd->cur_buf = new_fb;
 
   if (!first_partition_size) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 28a900514..e510ee1fd 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -22,6 +22,10 @@
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_ports/system_state.h"
 
+#if CONFIG_MISMATCH_DEBUG
+#include "vpx_util/vpx_debug_util.h"
+#endif  // CONFIG_MISMATCH_DEBUG
+
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_entropymode.h"
@@ -6105,6 +6109,10 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   restore_encode_params(cpi);
 #endif
 
+#if CONFIG_MISMATCH_DEBUG
+  mismatch_reset_frame(MAX_MB_PLANE);
+#endif
+
   // In the longer term the encoder should be generalized to match the
   // decoder such that we allow compound where one of the 3 buffers has a
   // different sign bias and that buffer is then the fixed ref. However, this
@@ -6348,7 +6356,27 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
     vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
                                     VPXMAX(bsize, BLOCK_8X8));
 
-    vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_MISMATCH_DEBUG
+    if (output_enabled) {
+      int plane;
+      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+        const struct macroblockd_plane *pd = &xd->plane[plane];
+        int pixel_c, pixel_r;
+        const BLOCK_SIZE plane_bsize =
+            get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]);
+        const int bw = get_block_width(plane_bsize);
+        const int bh = get_block_height(plane_bsize);
+        mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
+                        pd->subsampling_x, pd->subsampling_y);
+
+        mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c,
+                                  pixel_r, bw, bh,
+                                  xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
+      }
+    }
+#endif
+
+    vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled);
     vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
                     VPXMAX(bsize, BLOCK_8X8));
   }
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 83cb37c2d..7630a8110 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -16,6 +16,10 @@
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
 
+#if CONFIG_MISMATCH_DEBUG
+#include "vpx_util/vpx_debug_util.h"
+#endif
+
 #include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
@@ -579,6 +583,11 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
 static void encode_block(int plane, int block, int row, int col,
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
   struct encode_b_args *const args = arg;
+#if CONFIG_MISMATCH_DEBUG
+  int mi_row = args->mi_row;
+  int mi_col = args->mi_col;
+  int output_enabled = args->output_enabled;
+#endif
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblock_plane *const p = &x->plane[plane];
@@ -595,7 +604,11 @@ static void encode_block(int plane, int block, int row, int col,
   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
     p->eobs[block] = 0;
     *a = *l = 0;
+#if CONFIG_MISMATCH_DEBUG
+    goto encode_block_end;
+#else
     return;
+#endif
   }
 
   if (!x->skip_recode) {
@@ -605,7 +618,11 @@ static void encode_block(int plane, int block, int row, int col,
         // skip forward transform
         p->eobs[block] = 0;
         *a = *l = 0;
+#if CONFIG_MISMATCH_DEBUG
+        goto encode_block_end;
+#else
         return;
+#endif
       } else {
         vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
       }
@@ -622,7 +639,11 @@ static void encode_block(int plane, int block, int row, int col,
           // skip forward transform
           p->eobs[block] = 0;
           *a = *l = 0;
+#if CONFIG_MISMATCH_DEBUG
+          goto encode_block_end;
+#else
           return;
+#endif
         }
       } else {
         vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
@@ -639,7 +660,13 @@ static void encode_block(int plane, int block, int row, int col,
 
   if (p->eobs[block]) *(args->skip) = 0;
 
-  if (x->skip_encode || p->eobs[block] == 0) return;
+  if (x->skip_encode || p->eobs[block] == 0) {
+#if CONFIG_MISMATCH_DEBUG
+    goto encode_block_end;
+#else
+    return;
+#endif
+  }
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
@@ -665,7 +692,11 @@ static void encode_block(int plane, int block, int row, int col,
                                xd->bd);
         break;
     }
+#if CONFIG_MISMATCH_DEBUG
+    goto encode_block_end;
+#else
     return;
+#endif
   }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -687,6 +718,19 @@ static void encode_block(int plane, int block, int row, int col,
       x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
       break;
   }
+#if CONFIG_MISMATCH_DEBUG
+encode_block_end:
+  if (output_enabled) {
+    int pixel_c, pixel_r;
+    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
+    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
+    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
+                    pd->subsampling_x, pd->subsampling_y);
+    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
+                             blk_w, blk_h,
+                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
+  }
+#endif
 }
 
 static void encode_block_pass1(int plane, int block, int row, int col,
@@ -720,12 +764,21 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
                                          encode_block_pass1, x);
 }
 
-void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
+void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
+                   int output_enabled) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MODE_INFO *mi = xd->mi[0];
-  struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
   int plane;
+#if CONFIG_MISMATCH_DEBUG
+  struct encode_b_args arg = { x,         1,      NULL,   NULL,
+                               &mi->skip, mi_row, mi_col, output_enabled };
+#else
+  struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
+  (void)mi_row;
+  (void)mi_col;
+  (void)output_enabled;
+#endif
 
   mi->skip = 1;
 
@@ -986,8 +1039,16 @@ void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
                                   int enable_optimize_b) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
+#if CONFIG_MISMATCH_DEBUG
+  // TODO(angiebird): make mismatch_debug support intra mode
+  struct encode_b_args arg = {
+    x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0,
+    0
+  };
+#else
   struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
                                ctx.tl[plane], &xd->mi[0]->skip };
+#endif
 
   if (enable_optimize_b && x->optimize &&
       (!x->skip_recode || !x->skip_optimize)) {
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index fa41f70ef..1975ee73a 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -24,10 +24,16 @@ struct encode_b_args {
   ENTROPY_CONTEXT *ta;
   ENTROPY_CONTEXT *tl;
   int8_t *skip;
+#if CONFIG_MISMATCH_DEBUG
+  int mi_row;
+  int mi_col;
+  int output_enabled;
+#endif
 };
 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
                    int ctx);
-void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
+                   int output_enabled);
 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index a5b89ee96..fde91fadc 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -26,9 +26,9 @@
 #include "vpx_ports/mem.h"
 #include "vpx_ports/system_state.h"
 #include "vpx_ports/vpx_timer.h"
-#if CONFIG_BITSTREAM_DEBUG
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 #include "vpx_util/vpx_debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
+#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_filter.h"
@@ -5255,6 +5255,9 @@ static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
                         unsigned int *frame_flags) {
   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
+#if CONFIG_MISMATCH_DEBUG
+  mismatch_move_frame_idx_w();
+#endif
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
 
   vp9_twopass_postencode_update(cpi);
@@ -7374,6 +7377,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   assert(cpi->oxcf.max_threads == 0 &&
          "bitstream debug tool does not support multithreading");
   bitstream_queue_record_write();
+#endif
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
 #endif
 
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index b483489d3..a431e4ca6 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -376,50 +376,8 @@ static TX_SIZE calculate_tx_size(VP9_COMP *const cpi, BLOCK_SIZE bsize,
     tx_size = VPXMIN(max_txsize_lookup[bsize],
                      tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
   }
-  return tx_size;
-}
 
-static void compute_intra_yprediction(PREDICTION_MODE mode, BLOCK_SIZE bsize,
-                                      MACROBLOCK *x, MACROBLOCKD *xd) {
-  struct macroblockd_plane *const pd = &xd->plane[0];
-  struct macroblock_plane *const p = &x->plane[0];
-  uint8_t *const src_buf_base = p->src.buf;
-  uint8_t *const dst_buf_base = pd->dst.buf;
-  const int src_stride = p->src.stride;
-  const int dst_stride = pd->dst.stride;
-  // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
-  // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
-  const TX_SIZE tx_size = max_txsize_lookup[bsize];
-  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
-  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
-  int row, col;
-  // If mb_to_right_edge is < 0 we are in a situation in which
-  // the current block size extends into the UMV and we won't
-  // visit the sub blocks that are wholly within the UMV.
-  const int max_blocks_wide =
-      num_4x4_w + (xd->mb_to_right_edge >= 0
-                       ? 0
-                       : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
-  const int max_blocks_high =
-      num_4x4_h + (xd->mb_to_bottom_edge >= 0
-                       ? 0
-                       : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
-
-  // Keep track of the row and column of the blocks we use so that we know
-  // if we are in the unrestricted motion border.
-  for (row = 0; row < max_blocks_high; row += (1 << tx_size)) {
-    // Skip visiting the sub blocks that are wholly within the UMV.
-    for (col = 0; col < max_blocks_wide; col += (1 << tx_size)) {
-      p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
-      pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
-      vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode,
-                              x->skip_encode ? p->src.buf : pd->dst.buf,
-                              x->skip_encode ? src_stride : dst_stride,
-                              pd->dst.buf, dst_stride, col, row, 0);
-    }
-  }
-  p->src.buf = src_buf_base;
-  pd->dst.buf = dst_buf_base;
+  return tx_size;
 }
 
 static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
@@ -1065,6 +1023,7 @@ static void estimate_block_intra(int plane, int block, int row, int col,
 
   if (plane == 0) {
     int64_t this_sse = INT64_MAX;
+    // TODO(jingning): This needs further refactoring.
     block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx,
               VPXMIN(tx_size, TX_16X16), 0, 1);
   } else {
@@ -2513,12 +2472,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
        bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad &&
        !x->lowvar_highsumdiff)) {
     struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
-    int64_t this_sse = INT64_MAX;
     int i;
     PRED_BUFFER *const best_pred = best_pickmode.best_pred;
     TX_SIZE intra_tx_size =
         VPXMIN(max_txsize_lookup[bsize],
                tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+    if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16)
+      intra_tx_size = TX_16X16;
 
     if (reuse_inter_pred && best_pred != NULL) {
       if (best_pred->data == orig_dst.buf) {
@@ -2579,13 +2539,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
       args.skippable = 1;
       args.rdc = &this_rdc;
       mi->tx_size = intra_tx_size;
-
-      compute_intra_yprediction(this_mode, bsize, x, xd);
-      model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
-                        &var_y, &sse_y, 1);
-      block_yrd(cpi, x, &this_rdc, &args.skippable, &this_sse, bsize,
-                VPXMIN(mi->tx_size, TX_16X16), 1, 1);
-
+      vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
+                                             &args);
       // Check skip cost here since skippable is not set for for uv, this
       // mirrors the behavior used by inter
       if (args.skippable) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index ec7d2fac4..6c7c4e0f8 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -751,8 +751,14 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
   if (args->exit_early) return;
 
   if (!is_inter_block(mi)) {
+#if CONFIG_MISMATCH_DEBUG
+    struct encode_b_args intra_arg = {
+      x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip, 0, 0, 0
+    };
+#else
     struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above,
                                        args->t_left, &mi->skip };
+#endif
     vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
                            &intra_arg);
     if (recon) {
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 91ce96bb6..343250702 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -116,6 +116,7 @@ DSP_SRCS-$(HAVE_NEON)  += arm/vpx_scaled_convolve8_neon.c
 
 ifeq ($(HAVE_NEON_ASM),yes)
 DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM)
+ifeq ($(CONFIG_VP9),yes)
 DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type2_neon$(ASM)
 DSP_SRCS-yes += arm/vpx_convolve8_vert_filter_type2_neon$(ASM)
 DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type1_neon$(ASM)
@@ -128,6 +129,7 @@ DSP_SRCS-yes += arm/vpx_convolve_avg_neon_asm$(ASM)
 DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.c
 DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.h
 DSP_SRCS-yes += arm/vpx_convolve_neon.c
+endif # CONFIG_VP9
 else
 ifeq ($(HAVE_NEON),yes)
 DSP_SRCS-yes += arm/vpx_convolve_copy_neon.c
diff --git a/vpx_util/vpx_debug_util.c b/vpx_util/vpx_debug_util.c
index ea8646fba..3ce4065ba 100644
--- a/vpx_util/vpx_debug_util.c
+++ b/vpx_util/vpx_debug_util.c
@@ -13,16 +13,7 @@
 #include <string.h>
 #include "vpx_util/vpx_debug_util.h"
 
-#if CONFIG_BITSTREAM_DEBUG
-#define QUEUE_MAX_SIZE 2000000
-static int result_queue[QUEUE_MAX_SIZE];
-static int prob_queue[QUEUE_MAX_SIZE];
-
-static int queue_r = 0;
-static int queue_w = 0;
-static int queue_prev_w = -1;
-static int skip_r = 0;
-static int skip_w = 0;
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 static int frame_idx_w = 0;
 static int frame_idx_r = 0;
 
@@ -33,7 +24,18 @@ int bitstream_queue_get_frame_write(void) { return frame_idx_w; }
 void bitstream_queue_set_frame_read(int frame_idx) { frame_idx_r = frame_idx; }
 
 int bitstream_queue_get_frame_read(void) { return frame_idx_r; }
+#endif
+
+#if CONFIG_BITSTREAM_DEBUG
+#define QUEUE_MAX_SIZE 2000000
+static int result_queue[QUEUE_MAX_SIZE];
+static int prob_queue[QUEUE_MAX_SIZE];
 
+static int queue_r = 0;
+static int queue_w = 0;
+static int queue_prev_w = -1;
+static int skip_r = 0;
+static int skip_w = 0;
 void bitstream_queue_set_skip_write(int skip) { skip_w = skip; }
 
 void bitstream_queue_set_skip_read(int skip) { skip_r = skip; }
@@ -70,3 +72,211 @@ void bitstream_queue_push(int result, const int prob) {
   }
 }
 #endif  // CONFIG_BITSTREAM_DEBUG
+
+#if CONFIG_MISMATCH_DEBUG
+static int frame_buf_idx_r = 0;
+static int frame_buf_idx_w = 0;
+#define MAX_FRAME_BUF_NUM 20
+#define MAX_FRAME_STRIDE 1920
+#define MAX_FRAME_HEIGHT 1080
+static uint16_t
+    frame_pre[MAX_FRAME_BUF_NUM][3]
+             [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT];  // prediction only
+static uint16_t
+    frame_tx[MAX_FRAME_BUF_NUM][3]
+            [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT];  // prediction + txfm
+static int frame_stride = MAX_FRAME_STRIDE;
+static int frame_height = MAX_FRAME_HEIGHT;
+static int frame_size = MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT;
+void mismatch_move_frame_idx_w(void) {
+  frame_buf_idx_w = (frame_buf_idx_w + 1) % MAX_FRAME_BUF_NUM;
+  if (frame_buf_idx_w == frame_buf_idx_r) {
+    printf("frame_buf overflow\n");
+    assert(0);
+  }
+}
+
+void mismatch_reset_frame(int num_planes) {
+  int plane;
+  for (plane = 0; plane < num_planes; ++plane) {
+    memset(frame_pre[frame_buf_idx_w][plane], 0,
+           sizeof(frame_pre[frame_buf_idx_w][plane][0]) * frame_size);
+    memset(frame_tx[frame_buf_idx_w][plane], 0,
+           sizeof(frame_tx[frame_buf_idx_w][plane][0]) * frame_size);
+  }
+}
+
+void mismatch_move_frame_idx_r(void) {
+  if (frame_buf_idx_w == frame_buf_idx_r) {
+    printf("frame_buf underflow\n");
+    assert(0);
+  }
+  frame_buf_idx_r = (frame_buf_idx_r + 1) % MAX_FRAME_BUF_NUM;
+}
+
+void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane,
+                               int pixel_c, int pixel_r, int blk_w, int blk_h,
+                               int highbd) {
+  const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
+  int r, c;
+
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+
+  for (r = 0; r < blk_h; ++r) {
+    for (c = 0; c < blk_w; ++c) {
+      frame_pre[frame_buf_idx_w][plane]
+               [(r + pixel_r) * frame_stride + c + pixel_c] =
+                   src16 ? src16[r * src_stride + c] : src[r * src_stride + c];
+    }
+  }
+#if 0
+  {
+    int ref_frame_idx = 3;
+    int ref_plane = 1;
+    int ref_pixel_c = 162;
+    int ref_pixel_r = 16;
+    if (frame_idx_w == ref_frame_idx && plane == ref_plane &&
+        ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w &&
+        ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) {
+      printf(
+          "\nrecord_block_pre frame_idx %d plane %d pixel_c %d pixel_r %d blk_w"
+          " %d blk_h %d\n",
+          frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h);
+    }
+  }
+#endif
+}
+void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h,
+                              int highbd) {
+  const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
+  int r, c;
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+
+  for (r = 0; r < blk_h; ++r) {
+    for (c = 0; c < blk_w; ++c) {
+      frame_tx[frame_buf_idx_w][plane]
+              [(r + pixel_r) * frame_stride + c + pixel_c] =
+                  src16 ? src16[r * src_stride + c] : src[r * src_stride + c];
+    }
+  }
+#if 0
+  {
+    int ref_frame_idx = 3;
+    int ref_plane = 1;
+    int ref_pixel_c = 162;
+    int ref_pixel_r = 16;
+    if (frame_idx_w == ref_frame_idx && plane == ref_plane &&
+        ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w &&
+        ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) {
+      printf(
+          "\nrecord_block_tx frame_idx %d plane %d pixel_c %d pixel_r %d blk_w "
+          "%d blk_h %d\n",
+          frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h);
+    }
+  }
+#endif
+}
+void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h,
+                              int highbd) {
+  const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
+  int mismatch = 0;
+  int r, c;
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+
+  for (r = 0; r < blk_h; ++r) {
+    for (c = 0; c < blk_w; ++c) {
+      if (frame_pre[frame_buf_idx_r][plane]
+                   [(r + pixel_r) * frame_stride + c + pixel_c] !=
+          (uint16_t)(src16 ? src16[r * src_stride + c]
+                           : src[r * src_stride + c])) {
+        mismatch = 1;
+      }
+    }
+  }
+  if (mismatch) {
+    int rr, cc;
+    printf(
+        "\ncheck_block_pre failed frame_idx %d plane %d "
+        "pixel_c %d pixel_r "
+        "%d blk_w %d blk_h %d\n",
+        frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h);
+    printf("enc\n");
+    for (rr = 0; rr < blk_h; ++rr) {
+      for (cc = 0; cc < blk_w; ++cc) {
+        printf("%d ", frame_pre[frame_buf_idx_r][plane]
+                               [(rr + pixel_r) * frame_stride + cc + pixel_c]);
+      }
+      printf("\n");
+    }
+
+    printf("dec\n");
+    for (rr = 0; rr < blk_h; ++rr) {
+      for (cc = 0; cc < blk_w; ++cc) {
+        printf("%d ",
+               src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]);
+      }
+      printf("\n");
+    }
+    assert(0);
+  }
+}
+void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane,
+                             int pixel_c, int pixel_r, int blk_w, int blk_h,
+                             int highbd) {
+  const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
+  int mismatch = 0;
+  int r, c;
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+
+  for (r = 0; r < blk_h; ++r) {
+    for (c = 0; c < blk_w; ++c) {
+      if (frame_tx[frame_buf_idx_r][plane]
+                  [(r + pixel_r) * frame_stride + c + pixel_c] !=
+          (uint16_t)(src16 ? src16[r * src_stride + c]
+                           : src[r * src_stride + c])) {
+        mismatch = 1;
+      }
+    }
+  }
+  if (mismatch) {
+    int rr, cc;
+    printf(
+        "\ncheck_block_tx failed frame_idx %d plane %d pixel_c "
+        "%d pixel_r "
+        "%d blk_w %d blk_h %d\n",
+        frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h);
+    printf("enc\n");
+    for (rr = 0; rr < blk_h; ++rr) {
+      for (cc = 0; cc < blk_w; ++cc) {
+        printf("%d ", frame_tx[frame_buf_idx_r][plane]
+                              [(rr + pixel_r) * frame_stride + cc + pixel_c]);
+      }
+      printf("\n");
+    }
+
+    printf("dec\n");
+    for (rr = 0; rr < blk_h; ++rr) {
+      for (cc = 0; cc < blk_w; ++cc) {
+        printf("%d ",
+               src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]);
+      }
+      printf("\n");
+    }
+    assert(0);
+  }
+}
+#endif  // CONFIG_MISMATCH_DEBUG
diff --git a/vpx_util/vpx_debug_util.h b/vpx_util/vpx_debug_util.h
index e628f4305..df1a1aab2 100644
--- a/vpx_util/vpx_debug_util.h
+++ b/vpx_util/vpx_debug_util.h
@@ -19,6 +19,13 @@
 extern "C" {
 #endif
 
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
+void bitstream_queue_set_frame_write(int frame_idx);
+int bitstream_queue_get_frame_write(void);
+void bitstream_queue_set_frame_read(int frame_idx);
+int bitstream_queue_get_frame_read(void);
+#endif
+
 #if CONFIG_BITSTREAM_DEBUG
 /* This is a debug tool used to detect bitstream error. On encoder side, it
  * pushes each bit and probability into a queue before the bit is written into
@@ -28,10 +35,6 @@ extern "C" {
  * an error.  This tool can be used to pin down the bitstream error precisely.
  * By combining gdb's backtrace method, we can detect which module causes the
  * bitstream error. */
-void bitstream_queue_set_frame_write(int frame_idx);
-int bitstream_queue_get_frame_write(void);
-void bitstream_queue_set_frame_read(int frame_idx);
-int bitstream_queue_get_frame_read(void);
 int bitstream_queue_get_write(void);
 int bitstream_queue_get_read(void);
 void bitstream_queue_record_write(void);
@@ -42,6 +45,24 @@ void bitstream_queue_set_skip_write(int skip);
 void bitstream_queue_set_skip_read(int skip);
 #endif  // CONFIG_BITSTREAM_DEBUG
 
+#if CONFIG_MISMATCH_DEBUG
+void mismatch_move_frame_idx_w(void);
+void mismatch_move_frame_idx_r(void);
+void mismatch_reset_frame(int num_planes);
+void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane,
+                               int pixel_c, int pixel_r, int blk_w, int blk_h,
+                               int highbd);
+void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h,
+                              int highbd);
+void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h,
+                              int highbd);
+void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane,
+                             int pixel_c, int pixel_r, int blk_w, int blk_h,
+                             int highbd);
+#endif  // CONFIG_MISMATCH_DEBUG
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vpx_util/vpx_util.mk b/vpx_util/vpx_util.mk
index e83a4c4ec..c97f3f322 100644
--- a/vpx_util/vpx_util.mk
+++ b/vpx_util/vpx_util.mk
@@ -15,5 +15,5 @@ UTIL_SRCS-yes += vpx_thread.h
 UTIL_SRCS-yes += endian_inl.h
 UTIL_SRCS-yes += vpx_write_yuv_frame.h
 UTIL_SRCS-yes += vpx_write_yuv_frame.c
-UTIL_SRCS-$(CONFIG_BITSTREAM_DEBUG) += vpx_debug_util.h
-UTIL_SRCS-$(CONFIG_BITSTREAM_DEBUG) += vpx_debug_util.c
+UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.h
+UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.c