9 files changed, 273 insertions, 243 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index e23ca9773..7644930c1 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -236,11 +236,11 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
   }
 }
 
-static void pack_inter_mode_mvs(
-    VP9_COMP *cpi, const MACROBLOCKD *const xd,
-    const MB_MODE_INFO_EXT *const mbmi_ext, vpx_writer *w,
-    unsigned int *const max_mv_magnitude,
-    int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
+static void pack_inter_mode_mvs(VP9_COMP *cpi, const MACROBLOCKD *const xd,
+                                const MB_MODE_INFO_EXT *const mbmi_ext,
+                                vpx_writer *w,
+                                unsigned int *const max_mv_magnitude,
+                                int interp_filter_selected[][SWITCHABLE]) {
   VP9_COMMON *const cm = &cpi->common;
   const nmv_context *nmvc = &cm->fc->nmvc;
   const struct segmentation *const seg = &cm->seg;
@@ -373,11 +373,12 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
   write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]);
 }
 
-static void write_modes_b(
-    VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
-    vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
-    int mi_row, int mi_col, unsigned int *const max_mv_magnitude,
-    int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
+static void write_modes_b(VP9_COMP *cpi, MACROBLOCKD *const xd,
+                          const TileInfo *const tile, vpx_writer *w,
+                          TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+                          int mi_row, int mi_col,
+                          unsigned int *const max_mv_magnitude,
+                          int interp_filter_selected[][SWITCHABLE]) {
   const VP9_COMMON *const cm = &cpi->common;
   const MB_MODE_INFO_EXT *const mbmi_ext =
       cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
@@ -422,12 +423,12 @@ static void write_partition(const VP9_COMMON *const cm,
   }
 }
 
-static void write_modes_sb(
-    VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
-    vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
-    int mi_row, int mi_col, BLOCK_SIZE bsize,
-    unsigned int *const max_mv_magnitude,
-    int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
+static void write_modes_sb(VP9_COMP *cpi, MACROBLOCKD *const xd,
+                           const TileInfo *const tile, vpx_writer *w,
+                           TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+                           int mi_row, int mi_col, BLOCK_SIZE bsize,
+                           unsigned int *const max_mv_magnitude,
+                           int interp_filter_selected[][SWITCHABLE]) {
   const VP9_COMMON *const cm = &cpi->common;
   const int bsl = b_width_log2_lookup[bsize];
   const int bs = (1 << bsl) / 4;
@@ -485,11 +486,10 @@ static void write_modes_sb(
     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
-static void write_modes(
-    VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
-    vpx_writer *w, int tile_row, int tile_col,
-    unsigned int *const max_mv_magnitude,
-    int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
+static void write_modes(VP9_COMP *cpi, MACROBLOCKD *const xd,
+                        const TileInfo *const tile, vpx_writer *w, int tile_row,
+                        int tile_col, unsigned int *const max_mv_magnitude,
+                        int interp_filter_selected[][SWITCHABLE]) {
   const VP9_COMMON *const cm = &cpi->common;
   int mi_row, mi_col, tile_sb_row;
   TOKENEXTRA *tok = NULL;
@@ -1241,12 +1241,21 @@ static void write_uncompressed_header(VP9_COMP *cpi,
       vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
       write_frame_size(cm, wb);
     } else {
+      static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+                                        VP9_ALT_FLAG };
+      const MV_REFERENCE_FRAME first_ref = get_first_ref_frame(cpi);
+      const int first_ref_map_idx = get_ref_frame_map_idx(cpi, first_ref);
       MV_REFERENCE_FRAME ref_frame;
       vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
-      for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-        assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
-        vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
-                             REF_FRAMES_LOG2);
+
+      // If a reference frame is not referenced, then set the index for that
+      // reference to the first one used/referenced.
+      for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
+        const int referenced = cpi->ref_frame_flags & flag_list[ref_frame];
+        const int map_idx = referenced ? get_ref_frame_map_idx(cpi, ref_frame)
+                                       : first_ref_map_idx;
+        assert(map_idx != INVALID_IDX);
+        vpx_wb_write_literal(wb, map_idx, REF_FRAMES_LOG2);
         vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
       }
 
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 7e80835f6..8d5ec5a36 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -25,6 +25,7 @@
 #endif
 #include "vpx_ports/mem.h"
 #include "vpx_ports/system_state.h"
+#include "vpx_ports/vpx_once.h"
 #include "vpx_ports/vpx_timer.h"
 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 #include "vpx_util/vpx_debug_util.h"
@@ -929,24 +930,21 @@ static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 }
 
-void vp9_initialize_enc(void) {
-  static volatile int init_done = 0;
-
-  if (!init_done) {
-    vp9_rtcd();
-    vpx_dsp_rtcd();
-    vpx_scale_rtcd();
-    vp9_init_intra_predictors();
-    vp9_init_me_luts();
-    vp9_rc_init_minq_luts();
-    vp9_entropy_mv_init();
+static void initialize_enc(void) {
+  vp9_rtcd();
+  vpx_dsp_rtcd();
+  vpx_scale_rtcd();
+  vp9_init_intra_predictors();
+  vp9_init_me_luts();
+  vp9_rc_init_minq_luts();
+  vp9_entropy_mv_init();
 #if !CONFIG_REALTIME_ONLY
-    vp9_temporal_filter_init();
+  vp9_temporal_filter_init();
 #endif
-    init_done = 1;
-  }
 }
 
+void vp9_initialize_enc(void) { once(initialize_enc); }
+
 static void dealloc_compressor_data(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   int i;
@@ -2676,7 +2674,6 @@ static void free_tpl_buffer(VP9_COMP *cpi);
 void vp9_remove_compressor(VP9_COMP *cpi) {
   VP9_COMMON *cm;
   unsigned int i;
-  int t;
 
   if (!cpi) return;
 
@@ -2789,28 +2786,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
 
   free_tpl_buffer(cpi);
 
-  for (t = 0; t < cpi->num_workers; ++t) {
-    VPxWorker *const worker = &cpi->workers[t];
-    EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
-
-    // Deallocate allocated threads.
-    vpx_get_worker_interface()->end(worker);
-
-    // Deallocate allocated thread data.
-    if (t < cpi->num_workers - 1) {
-      vpx_free(thread_data->td->counts);
-      vp9_free_pc_tree(thread_data->td);
-      vpx_free(thread_data->td);
-    }
-  }
-  vpx_free(cpi->tile_thr_data);
-  vpx_free(cpi->workers);
+  vp9_loop_filter_dealloc(&cpi->lf_row_sync);
+  vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
   vp9_row_mt_mem_dealloc(cpi);
-
-  if (cpi->num_workers > 1) {
-    vp9_loop_filter_dealloc(&cpi->lf_row_sync);
-    vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
-  }
+  vp9_encode_free_mt_data(cpi);
 
 #if !CONFIG_REALTIME_ONLY
   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 9774a64cc..1bca7ded7 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -1196,14 +1196,24 @@ static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
          (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
 }
 
+static INLINE MV_REFERENCE_FRAME get_first_ref_frame(VP9_COMP *const cpi) {
+  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+                                    VP9_ALT_FLAG };
+  MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
+  while (ref_frame < MAX_REF_FRAMES) {
+    if (cpi->ref_frame_flags & flag_list[ref_frame]) break;
+    ref_frame++;
+  }
+  return ref_frame;
+}
+
 static INLINE int get_ref_frame_map_idx(const VP9_COMP *cpi,
                                         MV_REFERENCE_FRAME ref_frame) {
-  if (ref_frame == LAST_FRAME) {
-    return cpi->lst_fb_idx;
-  } else if (ref_frame == GOLDEN_FRAME) {
-    return cpi->gld_fb_idx;
-  } else {
-    return cpi->alt_fb_idx;
+  switch (ref_frame) {
+    case LAST_FRAME: return cpi->lst_fb_idx;
+    case GOLDEN_FRAME: return cpi->gld_fb_idx;
+    case ALTREF_FRAME: return cpi->alt_fb_idx;
+    default: return INVALID_IDX;
   }
 }
 
diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c
index e7f8a537d..453fe2e0d 100644
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "vp9/common/vp9_thread_common.h"
+#include "vp9/encoder/vp9_bitstream.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_ethread.h"
@@ -79,60 +81,59 @@ static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
   VP9_COMMON *const cm = &cpi->common;
   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
   int i;
+  // While using SVC, we need to allocate threads according to the highest
+  // resolution. When row based multithreading is enabled, it is OK to
+  // allocate more threads than the number of max tile columns.
+  if (cpi->use_svc && !cpi->row_mt) {
+    int max_tile_cols = get_max_tile_cols(cpi);
+    num_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
+  }
+  assert(num_workers > 0);
+  if (num_workers == cpi->num_workers) return;
+  vp9_loop_filter_dealloc(&cpi->lf_row_sync);
+  vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
+  vp9_encode_free_mt_data(cpi);
 
-  // Only run once to create threads and allocate thread data.
-  if (cpi->num_workers == 0) {
-    int allocated_workers = num_workers;
-
-    // While using SVC, we need to allocate threads according to the highest
-    // resolution. When row based multithreading is enabled, it is OK to
-    // allocate more threads than the number of max tile columns.
-    if (cpi->use_svc && !cpi->row_mt) {
-      int max_tile_cols = get_max_tile_cols(cpi);
-      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
-    }
-
-    CHECK_MEM_ERROR(cm, cpi->workers,
-                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
+  CHECK_MEM_ERROR(cm, cpi->workers,
+                  vpx_malloc(num_workers * sizeof(*cpi->workers)));
 
-    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
-                    vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));
+  CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
+                  vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
 
-    for (i = 0; i < allocated_workers; i++) {
-      VPxWorker *const worker = &cpi->workers[i];
-      EncWorkerData *thread_data = &cpi->tile_thr_data[i];
+  for (i = 0; i < num_workers; i++) {
+    VPxWorker *const worker = &cpi->workers[i];
+    EncWorkerData *thread_data = &cpi->tile_thr_data[i];
 
-      ++cpi->num_workers;
-      winterface->init(worker);
+    ++cpi->num_workers;
+    winterface->init(worker);
 
-      if (i < allocated_workers - 1) {
-        thread_data->cpi = cpi;
+    if (i < num_workers - 1) {
+      thread_data->cpi = cpi;
 
-        // Allocate thread data.
-        CHECK_MEM_ERROR(cm, thread_data->td,
-                        vpx_memalign(32, sizeof(*thread_data->td)));
-        vp9_zero(*thread_data->td);
+      // Allocate thread data.
+      CHECK_MEM_ERROR(cm, thread_data->td,
+                      vpx_memalign(32, sizeof(*thread_data->td)));
+      vp9_zero(*thread_data->td);
 
-        // Set up pc_tree.
-        thread_data->td->leaf_tree = NULL;
-        thread_data->td->pc_tree = NULL;
-        vp9_setup_pc_tree(cm, thread_data->td);
+      // Set up pc_tree.
+      thread_data->td->leaf_tree = NULL;
+      thread_data->td->pc_tree = NULL;
+      vp9_setup_pc_tree(cm, thread_data->td);
 
-        // Allocate frame counters in thread data.
-        CHECK_MEM_ERROR(cm, thread_data->td->counts,
-                        vpx_calloc(1, sizeof(*thread_data->td->counts)));
+      // Allocate frame counters in thread data.
+      CHECK_MEM_ERROR(cm, thread_data->td->counts,
+                      vpx_calloc(1, sizeof(*thread_data->td->counts)));
 
-        // Create threads
-        if (!winterface->reset(worker))
-          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
-                             "Tile encoder thread creation failed");
-      } else {
-        // Main thread acts as a worker and uses the thread data in cpi.
-        thread_data->cpi = cpi;
-        thread_data->td = &cpi->td;
-      }
-      winterface->sync(worker);
+      // Create threads
+      if (!winterface->reset(worker))
+        vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+                           "Tile encoder thread creation failed");
+    } else {
+      // Main thread acts as a worker and uses the thread data in cpi.
+      thread_data->cpi = cpi;
+      thread_data->td = &cpi->td;
     }
+    winterface->sync(worker);
   }
 }
 
@@ -169,6 +170,27 @@ static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2,
   }
 }
 
+void vp9_encode_free_mt_data(struct VP9_COMP *cpi) {
+  int t;
+  for (t = 0; t < cpi->num_workers; ++t) {
+    VPxWorker *const worker = &cpi->workers[t];
+    EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
+
+    // Deallocate allocated threads.
+    vpx_get_worker_interface()->end(worker);
+
+    // Deallocate allocated thread data.
+    if (t < cpi->num_workers - 1) {
+      vpx_free(thread_data->td->counts);
+      vp9_free_pc_tree(thread_data->td);
+      vpx_free(thread_data->td);
+    }
+  }
+  vpx_free(cpi->tile_thr_data);
+  vpx_free(cpi->workers);
+  cpi->num_workers = 0;
+}
+
 void vp9_encode_tiles_mt(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
diff --git a/vp9/encoder/vp9_ethread.h b/vp9/encoder/vp9_ethread.h
index cda0293bc..4c192da51 100644
--- a/vp9/encoder/vp9_ethread.h
+++ b/vp9/encoder/vp9_ethread.h
@@ -42,6 +42,11 @@ typedef struct VP9RowMTSyncData {
   int rows;
 } VP9RowMTSync;
 
+// Frees EncWorkerData related allocations made by vp9_encode_*_mt().
+// row_mt specific data is freed with vp9_row_mt_mem_dealloc() and is not
+// called by this function.
+void vp9_encode_free_mt_data(struct VP9_COMP *cpi);
+
 void vp9_encode_tiles_mt(struct VP9_COMP *cpi);
 
 void vp9_encode_tiles_row_mt(struct VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index ac29f36ec..cd6706420 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -159,59 +159,63 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
 
 #if CONFIG_VP9_HIGHBITDEPTH
 /* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER(v, r, c)                                                \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
-    int64_t tmpmse;                                                          \
-    const MV mv = { r, c };                                                  \
-    const MV ref_mv = { rr, rc };                                            \
-    if (second_pred == NULL) {                                               \
-      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
-                         src_stride, &sse);                                  \
-    } else {                                                                 \
-      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
-                          src_stride, &sse, second_pred);                    \
-    }                                                                        \
-    tmpmse = thismse;                                                        \
-    tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);     \
-    if (tmpmse >= INT_MAX) {                                                 \
-      v = INT_MAX;                                                           \
-    } else if ((v = (uint32_t)tmpmse) < besterr) {                           \
-      besterr = v;                                                           \
-      br = r;                                                                \
-      bc = c;                                                                \
-      *distortion = thismse;                                                 \
-      *sse1 = sse;                                                           \
-    }                                                                        \
-  } else {                                                                   \
-    v = INT_MAX;                                                             \
-  }
+#define CHECK_BETTER(v, r, c)                                                  \
+  do {                                                                         \
+    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
+      int64_t tmpmse;                                                          \
+      const MV mv = { r, c };                                                  \
+      const MV ref_mv = { rr, rc };                                            \
+      if (second_pred == NULL) {                                               \
+        thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
+                           src_stride, &sse);                                  \
+      } else {                                                                 \
+        thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
+                            src_stride, &sse, second_pred);                    \
+      }                                                                        \
+      tmpmse = thismse;                                                        \
+      tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);     \
+      if (tmpmse >= INT_MAX) {                                                 \
+        v = INT_MAX;                                                           \
+      } else if ((v = (uint32_t)tmpmse) < besterr) {                           \
+        besterr = v;                                                           \
+        br = r;                                                                \
+        bc = c;                                                                \
+        *distortion = thismse;                                                 \
+        *sse1 = sse;                                                           \
+      }                                                                        \
+    } else {                                                                   \
+      v = INT_MAX;                                                             \
+    }                                                                          \
+  } while (0)
 #else
 /* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER(v, r, c)                                                \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
-    const MV mv = { r, c };                                                  \
-    const MV ref_mv = { rr, rc };                                            \
-    if (second_pred == NULL)                                                 \
-      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
-                         src_stride, &sse);                                  \
-    else                                                                     \
-      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
-                          src_stride, &sse, second_pred);                    \
-    if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +     \
-             thismse) < besterr) {                                           \
-      besterr = v;                                                           \
-      br = r;                                                                \
-      bc = c;                                                                \
-      *distortion = thismse;                                                 \
-      *sse1 = sse;                                                           \
-    }                                                                        \
-  } else {                                                                   \
-    v = INT_MAX;                                                             \
-  }
+#define CHECK_BETTER(v, r, c)                                                  \
+  do {                                                                         \
+    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
+      const MV mv = { r, c };                                                  \
+      const MV ref_mv = { rr, rc };                                            \
+      if (second_pred == NULL)                                                 \
+        thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
+                           src_stride, &sse);                                  \
+      else                                                                     \
+        thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
+                            src_stride, &sse, second_pred);                    \
+      if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +     \
+               thismse) < besterr) {                                           \
+        besterr = v;                                                           \
+        br = r;                                                                \
+        bc = c;                                                                \
+        *distortion = thismse;                                                 \
+        *sse1 = sse;                                                           \
+      }                                                                        \
+    } else {                                                                   \
+      v = INT_MAX;                                                             \
+    }                                                                          \
+  } while (0)
 
 #endif
 #define FIRST_LEVEL_CHECKS                                       \
-  {                                                              \
+  do {                                                           \
     unsigned int left, right, up, down, diag;                    \
     CHECK_BETTER(left, tr, tc - hstep);                          \
     CHECK_BETTER(right, tr, tc + hstep);                         \
@@ -224,10 +228,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
       case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
       case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
     }                                                            \
-  }
+  } while (0)
 
 #define SECOND_LEVEL_CHECKS                                       \
-  {                                                               \
+  do {                                                            \
     int kr, kc;                                                   \
     unsigned int second;                                          \
     if (tr != br && tc != bc) {                                   \
@@ -256,7 +260,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
         case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
       }                                                           \
     }                                                             \
-  }
+  } while (0)
 
 #define SETUP_SUBPEL_SEARCH                                                 \
   const uint8_t *const z = x->plane[0].src.buf;                             \
@@ -290,7 +294,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
   maxr = subpel_mv_limits.row_max;                                          \
                                                                             \
   bestmv->row *= 8;                                                         \
-  bestmv->col *= 8;
+  bestmv->col *= 8
 
 static unsigned int setup_center_error(
     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
@@ -678,48 +682,52 @@ static int accurate_sub_pel_search(
 // TODO(yunqing): this part can be further refactored.
 #if CONFIG_VP9_HIGHBITDEPTH
 /* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER1(v, r, c)                                                 \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                      \
-    int64_t tmpmse;                                                            \
-    const MV mv = { r, c };                                                    \
-    const MV ref_mv = { rr, rc };                                              \
-    thismse =                                                                  \
-        accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
-                                y, y_stride, second_pred, w, h, &sse);         \
-    tmpmse = thismse;                                                          \
-    tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);       \
-    if (tmpmse >= INT_MAX) {                                                   \
-      v = INT_MAX;                                                             \
-    } else if ((v = (uint32_t)tmpmse) < besterr) {                             \
-      besterr = v;                                                             \
-      br = r;                                                                  \
-      bc = c;                                                                  \
-      *distortion = thismse;                                                   \
-      *sse1 = sse;                                                             \
-    }                                                                          \
-  } else {                                                                     \
-    v = INT_MAX;                                                               \
-  }
+#define CHECK_BETTER1(v, r, c)                                                \
+  do {                                                                        \
+    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
+      int64_t tmpmse;                                                         \
+      const MV mv = { r, c };                                                 \
+      const MV ref_mv = { rr, rc };                                           \
+      thismse = accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z,    \
+                                        src_stride, y, y_stride, second_pred, \
+                                        w, h, &sse);                          \
+      tmpmse = thismse;                                                       \
+      tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);    \
+      if (tmpmse >= INT_MAX) {                                                \
+        v = INT_MAX;                                                          \
+      } else if ((v = (uint32_t)tmpmse) < besterr) {                          \
+        besterr = v;                                                          \
+        br = r;                                                               \
+        bc = c;                                                               \
+        *distortion = thismse;                                                \
+        *sse1 = sse;                                                          \
+      }                                                                       \
+    } else {                                                                  \
+      v = INT_MAX;                                                            \
+    }                                                                         \
+  } while (0)
 #else
 /* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER1(v, r, c)                                                 \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                      \
-    const MV mv = { r, c };                                                    \
-    const MV ref_mv = { rr, rc };                                              \
-    thismse =                                                                  \
-        accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
-                                y, y_stride, second_pred, w, h, &sse);         \
-    if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +       \
-             thismse) < besterr) {                                             \
-      besterr = v;                                                             \
-      br = r;                                                                  \
-      bc = c;                                                                  \
-      *distortion = thismse;                                                   \
-      *sse1 = sse;                                                             \
-    }                                                                          \
-  } else {                                                                     \
-    v = INT_MAX;                                                               \
-  }
+#define CHECK_BETTER1(v, r, c)                                                \
+  do {                                                                        \
+    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
+      const MV mv = { r, c };                                                 \
+      const MV ref_mv = { rr, rc };                                           \
+      thismse = accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z,    \
+                                        src_stride, y, y_stride, second_pred, \
+                                        w, h, &sse);                          \
+      if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +    \
+               thismse) < besterr) {                                          \
+        besterr = v;                                                          \
+        br = r;                                                               \
+        bc = c;                                                               \
+        *distortion = thismse;                                                \
+        *sse1 = sse;                                                          \
+      }                                                                       \
+    } else {                                                                  \
+      v = INT_MAX;                                                            \
+    }                                                                         \
+  } while (0)
 
 #endif
 
@@ -2962,7 +2970,7 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
   (void)sse;           \
   (void)thismse;       \
   (void)cost_list;     \
-  (void)use_accurate_subpel_search;
+  (void)use_accurate_subpel_search
 
 // Return the maximum MV.
 uint32_t vp9_return_max_sub_pixel_mv(
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index e38464c72..ac346115f 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -223,9 +223,10 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
   if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
 
   if (oxcf->rc_max_inter_bitrate_pct) {
-    const int max_rate =
-        rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
-    target = VPXMIN(target, max_rate);
+    const int64_t max_rate =
+        (int64_t)rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
+    // target is of type int and VPXMIN cannot evaluate to larger than target
+    target = (int)VPXMIN(target, max_rate);
   }
   return target;
 }
@@ -234,9 +235,9 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
   const RATE_CONTROL *rc = &cpi->rc;
   const VP9EncoderConfig *oxcf = &cpi->oxcf;
   if (oxcf->rc_max_intra_bitrate_pct) {
-    const int max_rate =
-        rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100;
-    target = VPXMIN(target, max_rate);
+    const int64_t max_rate =
+        (int64_t)rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100;
+    target = (int)VPXMIN(target, max_rate);
   }
   if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
   return target;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index ad3a8f7af..f01cb17a2 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -73,7 +73,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
     svc->downsample_filter_type[sl] = BILINEAR;
     svc->downsample_filter_phase[sl] = 8;  // Set to 8 for averaging filter.
     svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
-    svc->fb_idx_upd_tl0[sl] = -1;
+    svc->fb_idx_upd_tl0[sl] = INVALID_IDX;
     svc->drop_count[sl] = 0;
     svc->spatial_layer_sync[sl] = 0;
     svc->force_drop_constrained_from_above[sl] = 0;
@@ -462,32 +462,21 @@ static void reset_fb_idx_unused(VP9_COMP *const cpi) {
   // fb_idx for that reference to the first one used/referenced.
   // This is to avoid setting fb_idx for a reference to a slot that is not
   // used/needed (i.e., since that reference is not referenced or refreshed).
-  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
-                                    VP9_ALT_FLAG };
-  MV_REFERENCE_FRAME ref_frame;
-  MV_REFERENCE_FRAME first_ref = 0;
-  int first_fb_idx = 0;
-  int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx };
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
-    if (cpi->ref_frame_flags & flag_list[ref_frame]) {
-      first_ref = ref_frame;
-      first_fb_idx = fb_idx[ref_frame - 1];
-      break;
+  const MV_REFERENCE_FRAME first_ref = get_first_ref_frame(cpi);
+  const int map_idx = get_ref_frame_map_idx(cpi, first_ref);
+  if (map_idx != INVALID_IDX) {
+    if (!(cpi->ref_frame_flags & VP9_LAST_FLAG ||
+          cpi->ext_refresh_last_frame)) {
+      cpi->lst_fb_idx = map_idx;
+    }
+    if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG ||
+          cpi->ext_refresh_golden_frame)) {
+      cpi->gld_fb_idx = map_idx;
+    }
+    if (!(cpi->ref_frame_flags & VP9_ALT_FLAG ||
+          cpi->ext_refresh_alt_ref_frame)) {
+      cpi->alt_fb_idx = map_idx;
     }
-  }
-  if (first_ref > 0) {
-    if (first_ref != LAST_FRAME &&
-        !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) &&
-        !cpi->ext_refresh_last_frame)
-      cpi->lst_fb_idx = first_fb_idx;
-    else if (first_ref != GOLDEN_FRAME &&
-             !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
-             !cpi->ext_refresh_golden_frame)
-      cpi->gld_fb_idx = first_fb_idx;
-    else if (first_ref != ALTREF_FRAME &&
-             !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) &&
-             !cpi->ext_refresh_alt_ref_frame)
-      cpi->alt_fb_idx = first_fb_idx;
   }
 }
 
@@ -716,9 +705,9 @@ static void set_flags_and_fb_idx_bypass_via_set_ref_frame_config(
   int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode;
   cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl];
   cpi->ext_refresh_frame_flags_pending = 1;
-  cpi->lst_fb_idx = svc->lst_fb_idx[sl];
-  cpi->gld_fb_idx = svc->gld_fb_idx[sl];
-  cpi->alt_fb_idx = svc->alt_fb_idx[sl];
+  if (svc->reference_last[sl]) cpi->lst_fb_idx = svc->lst_fb_idx[sl];
+  if (svc->reference_golden[sl]) cpi->gld_fb_idx = svc->gld_fb_idx[sl];
+  if (svc->reference_altref[sl]) cpi->alt_fb_idx = svc->alt_fb_idx[sl];
   cpi->ext_refresh_last_frame = 0;
   cpi->ext_refresh_golden_frame = 0;
   cpi->ext_refresh_alt_ref_frame = 0;
@@ -875,9 +864,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
     // flags are passed via the encode call (bypass mode). Issue is that we're
     // resetting ext_refresh_frame_flags_pending to 0 on frame drops.
     if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
-      memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx));
-      memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx));
-      memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx));
+      memset(&svc->lst_fb_idx, INVALID_IDX, sizeof(svc->lst_fb_idx));
+      memset(&svc->gld_fb_idx, INVALID_IDX, sizeof(svc->lst_fb_idx));
+      memset(&svc->alt_fb_idx, INVALID_IDX, sizeof(svc->lst_fb_idx));
       // These are set by API before the superframe is encoded and they are
       // passed to encoder layer by layer. Don't reset them on layer 0 in bypass
       // mode.
@@ -970,7 +959,7 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
 
   if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
       svc->last_layer_dropped[svc->spatial_layer_id] &&
-      svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 &&
+      svc->fb_idx_upd_tl0[svc->spatial_layer_id] != INVALID_IDX &&
       !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
     // For fixed/non-flexible mode, if the previous frame (same spatial layer
     // from previous superframe) was dropped, make sure the lst_fb_idx
diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
index 4be6a5ea0..fcf50eb2a 100644
--- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -282,7 +282,14 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
 
         // Update the global minimum if the local minimum is smaller
         if (LIKELY(local_best_sad < best_sad)) {
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
           new_bmv = ((const int_mv *)&v_these_mv_w)[local_best_idx];
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
           new_best_address = ((const uint8_t **)v_blocka)[local_best_idx];
 
           best_sad = local_best_sad;