16 files changed, 237 insertions, 351 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 8179a6915..a4120c28a 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -70,7 +70,7 @@ void vp9_free_frame_buffers(VP9_COMMON *oci) {
 }
 
 int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
-  int i, mb_cols;
+  int i, mi_cols;
 
   // Our internal buffers are always multiples of 16
   const int aligned_width = multiple16(width);
@@ -140,19 +140,19 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
 
   // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
   // information is exposed at this level
-  mb_cols = mb_cols_aligned_to_sb(oci);
-  oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 12 * mb_cols, 1);
+  mi_cols = mi_cols_aligned_to_sb(oci);
+  oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1);
   if (!oci->above_context[0]) {
     vp9_free_frame_buffers(oci);
     return 1;
   }
   oci->above_context[1] =
-    oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols;
+    oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
   oci->above_context[2] =
-    oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols;
+    oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
 
   oci->above_seg_context =
-    vpx_calloc(sizeof(PARTITION_CONTEXT) * mb_cols_aligned_to_sb(oci), 1);
+    vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
 
   if (!oci->above_seg_context) {
     vp9_free_frame_buffers(oci);
@@ -177,18 +177,15 @@ void vp9_setup_version(VP9_COMMON *cm) {
   switch (cm->version & 0x3) {
     case 0:
       cm->no_lpf = 0;
-      cm->filter_type = NORMAL_LOOPFILTER;
       cm->use_bilinear_mc_filter = 0;
       break;
     case 1:
       cm->no_lpf = 0;
-      cm->filter_type = SIMPLE_LOOPFILTER;
       cm->use_bilinear_mc_filter = 1;
       break;
     case 2:
     case 3:
       cm->no_lpf = 1;
-      cm->filter_type = NORMAL_LOOPFILTER;
       cm->use_bilinear_mc_filter = 1;
       break;
   }
@@ -203,7 +200,6 @@ void vp9_create_common(VP9_COMMON *oci) {
   oci->txfm_mode = ONLY_4X4;
   oci->comp_pred_mode = HYBRID_PREDICTION;
   oci->no_lpf = 0;
-  oci->filter_type = NORMAL_LOOPFILTER;
   oci->use_bilinear_mc_filter = 0;
   oci->clr_type = REG_YUV;
   oci->clamp_type = RECON_CLAMP_REQUIRED;
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 6b104a129..d111e7fa8 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -70,17 +70,17 @@ typedef enum {
 } INTERPOLATIONFILTERTYPE;
 
 typedef enum {
-  DC_PRED,            /* average of above and left pixels */
-  V_PRED,             /* vertical prediction */
-  H_PRED,             /* horizontal prediction */
-  D45_PRED,           /* Directional 45 deg prediction  [anti-clockwise from 0 deg hor] */
-  D135_PRED,          /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */
-  D117_PRED,          /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */
-  D153_PRED,          /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */
-  D27_PRED,           /* Directional 22 deg prediction  [anti-clockwise from 0 deg hor] */
-  D63_PRED,           /* Directional 67 deg prediction  [anti-clockwise from 0 deg hor] */
-  TM_PRED,            /* Truemotion prediction */
-  I4X4_PRED,          /* 4x4 based prediction, each 4x4 has its own mode */
+  DC_PRED,         // Average of above and left pixels
+  V_PRED,          // Vertical
+  H_PRED,          // Horizontal
+  D45_PRED,        // Directional 45  deg = round(arctan(1/1) * 180/pi)
+  D135_PRED,       // Directional 135 deg = 180 - 45
+  D117_PRED,       // Directional 117 deg = 180 - 63
+  D153_PRED,       // Directional 153 deg = 180 - 27
+  D27_PRED,        // Directional 27  deg = round(arctan(1/2) * 180/pi)
+  D63_PRED,        // Directional 63  deg = round(arctan(2/1) * 180/pi)
+  TM_PRED,         // True-motion
+  I4X4_PRED,       // Each 4x4 subblock has its own mode
   NEARESTMV,
   NEARMV,
   ZEROMV,
@@ -408,7 +408,7 @@ typedef struct macroblockd {
 static INLINE void update_partition_context(MACROBLOCKD *xd,
                                             BLOCK_SIZE_TYPE sb_type,
                                             BLOCK_SIZE_TYPE sb_size) {
-  int bsl = mi_width_log2(sb_size), bs;
+  int bsl = mi_width_log2(sb_size), bs = 1 << bsl;
   int bwl = mi_width_log2(sb_type);
   int bhl = mi_height_log2(sb_type);
   int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
@@ -417,8 +417,6 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
   if (bsl == 0)
     return;
 
-  bs = 1 << (bsl - 1);
-
   // update the partition context at the end notes. set partition bits
   // of block sizes larger than the current one to be one, and partition
   // bits of smaller block sizes to be zero.
@@ -449,18 +447,14 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
 
 static INLINE int partition_plane_context(MACROBLOCKD *xd,
                                           BLOCK_SIZE_TYPE sb_type) {
-  int bsl = mi_width_log2(sb_type), bs;
+  int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
   int above = 0, left = 0, i;
   int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
 
-  bs = 1 << (bsl - 1);
-
   assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
   assert(bsl >= 0);
   assert(boffset >= 0);
 
-  bs = 1 << (bsl - 1);
-
   for (i = 0; i < bs; i++)
     above |= (xd->above_seg_context[i] & (1 << boffset));
   for (i = 0; i < bs; i++)
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index b00d89204..166319565 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -18,6 +18,8 @@
 #define MI_SIZE (1 << LOG2_MI_SIZE)
 #define MI_UV_SIZE (1 << (LOG2_MI_SIZE - 1))
 
+#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)
+
 typedef enum BLOCK_SIZE_TYPE {
   BLOCK_SIZE_AB4X4,
 #if CONFIG_AB4X4
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 022abb8aa..b668212fc 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -199,80 +199,75 @@ static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi,
   if (filter_level) {
     const int skip_lf = mb_lf_skip(&mi->mbmi);
     const int tx_size = mi->mbmi.txfm_size;
-    if (cm->filter_type == NORMAL_LOOPFILTER) {
-      const int hev_index = filter_level >> 4;
-      lfi.mblim = lfi_n->mblim[filter_level];
-      lfi.blim = lfi_n->blim[filter_level];
-      lfi.lim = lfi_n->lim[filter_level];
-      lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
-      if (do_above_mb_h) {
-        if (tx_size >= TX_16X16)
-          vp9_lpf_mbh_w(y_ptr,
-                        do_above_mbuv_h ? u_ptr : NULL,
-                        do_above_mbuv_h ? v_ptr : NULL,
-                        y_stride, uv_stride, &lfi);
+    const int hev_index = filter_level >> 4;
+    lfi.mblim = lfi_n->mblim[filter_level];
+    lfi.blim = lfi_n->blim[filter_level];
+    lfi.lim = lfi_n->lim[filter_level];
+    lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+    if (do_above_mb_h) {
+      if (tx_size >= TX_16X16)
+        vp9_lpf_mbh_w(y_ptr,
+                      do_above_mbuv_h ? u_ptr : NULL,
+                      do_above_mbuv_h ? v_ptr : NULL,
+                      y_stride, uv_stride, &lfi);
+      else
+        vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi);
+    }
+
+    if (!skip_lf) {
+      if (tx_size >= TX_8X8) {
+        if (tx_size == TX_8X8 &&
+            mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
+          vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr,
+                                y_stride, uv_stride, &lfi);
         else
-          vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi);
+          vp9_loop_filter_bh8x8(y_ptr, NULL, NULL,
+                                y_stride, uv_stride, &lfi);
+      } else {
+        vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr,
+                           y_stride, uv_stride, &lfi);
       }
+    }
 
-      if (!skip_lf) {
-        if (tx_size >= TX_8X8) {
-          if (tx_size == TX_8X8 &&
-              (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
-              )
-            vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr,
-                                  y_stride, uv_stride, &lfi);
-          else
-            vp9_loop_filter_bh8x8(y_ptr, NULL, NULL,
-                                  y_stride, uv_stride, &lfi);
-        } else {
-          vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr,
-                             y_stride, uv_stride, &lfi);
-        }
-      }
+    if (do_left_mb_v) {
+      if (tx_size >= TX_16X16)
+        vp9_lpf_mbv_w(y_ptr,
+                      do_left_mbuv_v ? u_ptr : NULL,
+                      do_left_mbuv_v ? v_ptr : NULL,
+                      y_stride, uv_stride, &lfi);
+      else
+        vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi);
+    }
 
-      if (do_left_mb_v) {
-        if (tx_size >= TX_16X16)
-          vp9_lpf_mbv_w(y_ptr,
-                        do_left_mbuv_v ? u_ptr : NULL,
-                        do_left_mbuv_v ? v_ptr : NULL,
-                        y_stride, uv_stride, &lfi);
+    if (!skip_lf) {
+      if (tx_size >= TX_8X8) {
+        if (tx_size == TX_8X8 &&
+            mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
+          vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr,
+                                y_stride, uv_stride, &lfi);
         else
-          vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi);
-      }
-
-      if (!skip_lf) {
-        if (tx_size >= TX_8X8) {
-          if (tx_size == TX_8X8 &&
-              (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16))
-            vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr,
-                                  y_stride, uv_stride, &lfi);
-          else
-            vp9_loop_filter_bv8x8(y_ptr, NULL, NULL,
-                                  y_stride, uv_stride, &lfi);
-        } else {
-          vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr,
-                             y_stride, uv_stride, &lfi);
-        }
+          vp9_loop_filter_bv8x8(y_ptr, NULL, NULL,
+                                y_stride, uv_stride, &lfi);
+      } else {
+        vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr,
+                           y_stride, uv_stride, &lfi);
       }
-      if (dering) {
+    }
+    if (dering) {
 #if CONFIG_LOOP_DERING
-        vp9_post_proc_down_and_across(y_ptr, y_ptr,
-          y_stride, y_stride,
-          16, 16, dering);
-        if (u_ptr && v_ptr) {
-          vp9_post_proc_down_and_across(u_ptr, u_ptr,
-            uv_stride, uv_stride,
-            8, 8, dering);
-          vp9_post_proc_down_and_across(v_ptr, v_ptr,
-            uv_stride, uv_stride,
-            8, 8, dering);
-        }
-#endif
+      vp9_post_proc_down_and_across(y_ptr, y_ptr,
+        y_stride, y_stride,
+        16, 16, dering);
+      if (u_ptr && v_ptr) {
+        vp9_post_proc_down_and_across(u_ptr, u_ptr,
+          uv_stride, uv_stride,
+          8, 8, dering);
+        vp9_post_proc_down_and_across(v_ptr, v_ptr,
+          uv_stride, uv_stride,
+          8, 8, dering);
       }
-    } else {
-      // TODO(yaowu): simple loop filter
+#endif
     }
   }
 }
@@ -462,54 +457,21 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
     }
     if (extra_mb_col) {
       // process 4 MB in the extra MB col
-      // process 1st MB
-      mi = mode_info_context;
-      do_left_v = (mb_col > 0);
-      do_above_h = (mb_row > 0);
-      do_left_v_mbuv =  1;
-      do_above_h_mbuv = 1;
-      lpf_mb(cm, mi, do_left_v, do_above_h,
-             do_left_v_mbuv, do_above_h_mbuv,
-             y_ptr,
-             y_only? 0 : u_ptr,
-             y_only? 0 : v_ptr,
-             y_stride, uv_stride, dering);
-      // process 2nd MB
-      mi = mode_info_context + (mis << 1);
-      do_left_v = (mb_col > 0);
-      do_above_h = 1;
-      do_left_v_mbuv =  1;
-      do_above_h_mbuv = 1;
-      lpf_mb(cm, mi, do_left_v, do_above_h,
-             do_left_v_mbuv, do_above_h_mbuv,
-             y_ptr + 16 * y_stride,
-             y_only ? 0 : (u_ptr + 8 * uv_stride),
-             y_only ? 0 : (v_ptr + 8 * uv_stride),
-             y_stride, uv_stride, dering);
-      // process 3nd MB
-      mi = mode_info_context + (mis << 1) * 2;
-      do_left_v = (mb_col > 0);
-      do_above_h = 1;
-      do_left_v_mbuv =  1;
-      do_above_h_mbuv = 1;
-      lpf_mb(cm, mi, do_left_v, do_above_h,
-             do_left_v_mbuv, do_above_h_mbuv,
-             y_ptr + 32 * y_stride,
-             y_only ? 0 : (u_ptr + 16 * uv_stride),
-             y_only ? 0 : (v_ptr + 16 * uv_stride),
-             y_stride, uv_stride, dering);
-      // process 4th MB
-      mi = mode_info_context + (mis << 1) * 3;
-      do_left_v = (mb_col > 0);
-      do_above_h = 1;
-      do_left_v_mbuv =  1;
-      do_above_h_mbuv = 1;
-      lpf_mb(cm, mi, do_left_v, do_above_h,
-             do_left_v_mbuv, do_above_h_mbuv,
-             y_ptr + 48 * y_stride,
-             y_only ? 0 : (u_ptr + 24 * uv_stride),
-             y_only ? 0 : (v_ptr + 24 * uv_stride),
-             y_stride, uv_stride, dering);
+      int k;
+      for (k = 0; k < 4; ++k) {
+        mi = mode_info_context + (mis << 1) * k;
+        do_left_v = (mb_col > 0);
+        do_above_h = k == 0 ? mb_row > 0 : 1;
+        do_left_v_mbuv =  1;
+        do_above_h_mbuv = 1;
+        lpf_mb(cm, mi, do_left_v, do_above_h,
+               do_left_v_mbuv, do_above_h_mbuv,
+               y_ptr + (k * 16) * y_stride,
+               y_only ? 0 : (u_ptr + (k * 8) * uv_stride),
+               y_only ? 0 : (v_ptr + (k * 8) * uv_stride),
+               y_stride, uv_stride, dering);
+      }
+
       y_ptr += 16;
       u_ptr = y_only? 0 : u_ptr + 8;
       v_ptr = y_only? 0 : v_ptr + 8;
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 81745e48a..80fccd576 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -16,12 +16,6 @@
 #include "vp9/common/vp9_blockd.h"
 
 #define MAX_LOOP_FILTER 63
-
-typedef enum {
-  NORMAL_LOOPFILTER = 0,
-  SIMPLE_LOOPFILTER = 1
-} LOOPFILTER_TYPE;
-
 #define SIMD_WIDTH 16
 
 /* Need to align this structure so when it is declared and
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d9d298903..9f0c712df 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -198,7 +198,6 @@ typedef struct VP9Common {
   unsigned char *last_frame_seg_map;
 
   INTERPOLATIONFILTERTYPE mcomp_filter_type;
-  LOOPFILTER_TYPE filter_type;
 
   loop_filter_info_n lf_info;
 
@@ -217,7 +216,7 @@ typedef struct VP9Common {
 
   // partition contexts
   PARTITION_CONTEXT *above_seg_context;
-  PARTITION_CONTEXT left_seg_context[4];
+  PARTITION_CONTEXT left_seg_context[8];
 
   /* keyframe block modes are predicted by their above, left neighbors */
 
@@ -297,8 +296,8 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
   buf[new_idx]++;
 }
 
-static int mb_cols_aligned_to_sb(VP9_COMMON *cm) {
-  return (cm->mb_cols + 3) & ~3;
+static int mi_cols_aligned_to_sb(VP9_COMMON *cm) {
+  return 2 * ((cm->mb_cols + 3) & ~3);
 }
 
 static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 65a909335..0f5cbf4ac 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -18,11 +18,8 @@
 #include "vp9/common/vp9_reconintra.h"
 
 void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
-                                       YV12_BUFFER_CONFIG *other,
+                                       int other_w, int other_h,
                                        int this_w, int this_h) {
-  int other_h = other->y_crop_height;
-  int other_w = other->y_crop_width;
-
   scale->x_num = other_w;
   scale->x_den = this_w;
   scale->x_offset_q4 = 0;  // calculated per-mb
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 3f9a7ab08..faf018c0d 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -35,7 +35,7 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd,
                               VP9_COMMON *cm);
 
 void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
-                                       YV12_BUFFER_CONFIG *other,
+                                       int other_w, int other_h,
                                        int this_w, int this_h);
 
 void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -67,14 +67,11 @@ static int unscaled_value(int val, const struct scale_factors *scale) {
   return val;
 }
 
-static int scaled_buffer_offset(int x_offset,
-                                int y_offset,
-                                int stride,
+static int scaled_buffer_offset(int x_offset, int y_offset, int stride,
                                 const struct scale_factors *scale) {
-  if (scale)
-    return scale->scale_value_y(y_offset, scale) * stride +
-        scale->scale_value_x(x_offset, scale);
-  return y_offset * stride + x_offset;
+  const int x = scale ? scale->scale_value_x(x_offset, scale) : x_offset;
+  const int y = scale ? scale->scale_value_y(y_offset, scale) : y_offset;
+  return y * stride + x;
 }
 
 static void setup_pred_plane(struct buf_2d *dst,
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index f244358b0..54e06f506 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -371,8 +371,8 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize,
     xd->plane[i].left_context = cm->left_context[i] +
         (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
   }
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
-  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> 1) & 3);
+  xd->above_seg_context = cm->above_seg_context + mi_col;
+  xd->left_seg_context  = cm->left_seg_context + (mi_row & MI_MASK);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -442,9 +442,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
   if (bsize > BLOCK_SIZE_SB8X8) {
     int pl;
     // read the partition information
-    xd->left_seg_context =
-        pc->left_seg_context + ((mi_row >> 1) & 3);
-    xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
+    xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
+    xd->above_seg_context = pc->above_seg_context + mi_col;
     pl = partition_plane_context(xd, bsize);
     partition = treed_read(r, vp9_partition_tree,
                            pc->fc.partition_prob[pl]);
@@ -485,8 +484,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
   if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
     return;
 
-  xd->left_seg_context = pc->left_seg_context + ((mi_row >> 1) & 3);
-  xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
+  xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
+  xd->above_seg_context = pc->above_seg_context + mi_col;
   update_partition_context(xd, subsize, bsize);
 }
 
@@ -657,7 +656,6 @@ static void setup_pred_probs(VP9_COMMON *pc, vp9_reader *r) {
 }
 
 static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) {
-  pc->filter_type = (LOOPFILTER_TYPE) vp9_read_bit(r);
   pc->filter_level = vp9_read_literal(r, 6);
   pc->sharpness_level = vp9_read_literal(r, 3);
 
@@ -847,11 +845,11 @@ static void decode_tiles(VP9D_COMP *pbi,
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 *
-                                      MAX_MB_PLANE * mb_cols_aligned_to_sb(pc));
+  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
+                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(pc));
 
   vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-                                       mb_cols_aligned_to_sb(pc));
+                                       mi_cols_aligned_to_sb(pc));
 
   if (pbi->oxcf.inv_tile_order) {
     const int n_cols = pc->tile_columns;
@@ -1014,7 +1012,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
       if (mapped_ref >= NUM_YV12_BUFFERS)
         memset(sf, 0, sizeof(*sf));
       else
-        vp9_setup_scale_factors_for_frame(sf, fb, pc->width, pc->height);
+        vp9_setup_scale_factors_for_frame(sf,
+                                          fb->y_crop_width, fb->y_crop_height,
+                                          pc->width, pc->height);
     }
 
     // Read the sign bias for each reference frame buffer.
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index a7c26a45b..20154d813 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -878,9 +878,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
 
   if (bsize > BLOCK_SIZE_SB8X8) {
     int pl;
-    xd->left_seg_context =
-        cm->left_seg_context + ((mi_row >> 1) & 3);
-    xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+    xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
+    xd->above_seg_context = cm->above_seg_context + mi_col;
     pl = partition_plane_context(xd, bsize);
     // encode the partition information
     write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl],
@@ -918,8 +917,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
   if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
     return;
 
-  xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+  xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
+  xd->above_seg_context = cm->above_seg_context + mi_col;
   update_partition_context(xd, subsize, bsize);
 }
 
@@ -932,7 +931,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
 
   m_ptr += c->cur_tile_mi_col_start + c->cur_tile_mi_row_start * mis;
   vpx_memset(c->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-             mb_cols_aligned_to_sb(c));
+             mi_cols_aligned_to_sb(c));
 
   for (mi_row = c->cur_tile_mi_row_start;
        mi_row < c->cur_tile_mi_row_end;
@@ -1497,7 +1496,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
   vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless);
 
   // Encode the loop filter level and type
-  vp9_write_bit(&header_bc, pc->filter_type);
   vp9_write_literal(&header_bc, pc->filter_level, 6);
   vp9_write_literal(&header_bc, pc->sharpness_level, 3);
 #if CONFIG_LOOP_DERING
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 8ddad26b7..49e8ccefa 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -544,8 +544,8 @@ static INLINE void set_partition_seg_context(VP9_COMP *cpi,
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
-  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> 1) & 3);
+  xd->above_seg_context = cm->above_seg_context + mi_col;
+  xd->left_seg_context  = cm->left_seg_context + (mi_row & MI_MASK);
 }
 
 static void set_offsets(VP9_COMP *cpi,
@@ -878,7 +878,7 @@ static void encode_sb_row(VP9_COMP *cpi,
     int sb64_rate = 0, sb64_dist = 0;
     int sb64_skip = 0;
     ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
-    PARTITION_CONTEXT seg_l[4], seg_a[4];
+    PARTITION_CONTEXT seg_l[64 / MI_SIZE], seg_a[64 / MI_SIZE];
     TOKENEXTRA *tp_orig = *tp;
 
     for (p = 0; p < MAX_MB_PLANE; p++) {
@@ -888,9 +888,8 @@ static void encode_sb_row(VP9_COMP *cpi,
       memcpy(l + 16 * p, cm->left_context[p],
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
     }
-    memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1),
-           sizeof(seg_a));
-    memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
+    vpx_memcpy(&seg_a, cm->above_seg_context + mi_col, sizeof(seg_a));
+    vpx_memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
 
     // FIXME(rbultje): this function should probably be rewritten to be
     // recursive at some point in the future.
@@ -902,6 +901,7 @@ static void encode_sb_row(VP9_COMP *cpi,
       int sb32_skip = 0;
       int j;
       ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE];
+      PARTITION_CONTEXT sl32[32 / MI_SIZE], sa32[32 / MI_SIZE];
 
       sb_partitioning[i] = BLOCK_SIZE_MB16X16;
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
@@ -920,6 +920,8 @@ static void encode_sb_row(VP9_COMP *cpi,
                        ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
       }
+      vpx_memcpy(&sa32, cm->above_seg_context + mi_col + x_idx, sizeof(sa32));
+      vpx_memcpy(&sl32, cm->left_seg_context + y_idx, sizeof(sl32));
 
       /* Encode MBs in raster order within the SB */
       for (j = 0; j < 4; j++) {
@@ -928,6 +930,7 @@ static void encode_sb_row(VP9_COMP *cpi,
         int r, d;
         int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
         ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
+        PARTITION_CONTEXT sl16[16 / MI_SIZE], sa16[16 / MI_SIZE];
 
         mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
 
@@ -950,6 +953,9 @@ static void encode_sb_row(VP9_COMP *cpi,
                          ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
+        vpx_memcpy(&sa16, cm->above_seg_context + mi_col + x_idx_m,
+                   sizeof(sa16));
+        vpx_memcpy(&sl16, cm->left_seg_context + y_idx_m, sizeof(sl16));
 
         for (k = 0; k < 4; k++) {
           xd->b_index = k;
@@ -983,6 +989,9 @@ static void encode_sb_row(VP9_COMP *cpi,
                      a3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
+        vpx_memcpy(cm->above_seg_context + mi_col + x_idx_m,
+                   sa16, sizeof(sa16));
+        vpx_memcpy(cm->left_seg_context + y_idx_m, sl16, sizeof(sl16));
 
         // try 8x16 coding
         r2 = 0;
@@ -1102,6 +1111,9 @@ static void encode_sb_row(VP9_COMP *cpi,
                    a2 + 8 * p,
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
       }
+      // restore partition information context
+      vpx_memcpy(cm->above_seg_context + mi_col + x_idx, sa32, sizeof(sa32));
+      vpx_memcpy(cm->left_seg_context + y_idx, sl32, sizeof(sl32));
 
       set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
       pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
@@ -1258,8 +1270,7 @@ static void encode_sb_row(VP9_COMP *cpi,
       memcpy(cm->left_context[p], l + 16 * p,
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
     }
-    memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a,
-           sizeof(seg_a));
+    memcpy(cm->above_seg_context + mi_col, &seg_a, sizeof(seg_a));
     memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
 
     set_partition_seg_context(cpi, mi_row, mi_col);
@@ -1428,10 +1439,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 *
-                                      MAX_MB_PLANE * mb_cols_aligned_to_sb(cm));
+  vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
+                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
   vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-                                       mb_cols_aligned_to_sb(cm));
+                                       mi_cols_aligned_to_sb(cm));
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 018c86cb9..7d4906cf7 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -9,12 +9,13 @@
  */
 
 #include <limits.h>
+
+#include <vpx_mem/vpx_mem.h>
 #include <vp9/encoder/vp9_encodeintra.h>
 #include <vp9/encoder/vp9_rdopt.h>
 #include <vp9/common/vp9_blockd.h>
 #include <vp9/common/vp9_reconinter.h>
 #include <vp9/common/vp9_systemdependent.h>
-#include <vpx_mem/vpx_mem.h>
 #include <vp9/encoder/vp9_segmentation.h>
 
 static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
@@ -27,15 +28,15 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
   vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
   unsigned int best_err;
 
-  int tmp_col_min = x->mv_col_min;
-  int tmp_col_max = x->mv_col_max;
-  int tmp_row_min = x->mv_row_min;
-  int tmp_row_max = x->mv_row_max;
+  const int tmp_col_min = x->mv_col_min;
+  const int tmp_col_max = x->mv_col_max;
+  const int tmp_row_min = x->mv_row_min;
+  const int tmp_row_max = x->mv_row_max;
   int_mv ref_full;
 
   // Further step/diamond searches as necessary
   int step_param = cpi->sf.first_step +
-      (cpi->Speed < 8 ? (cpi->Speed > 5 ? 1 : 0) : 2);
+      (cpi->speed < 8 ? (cpi->speed > 5 ? 1 : 0) : 2);
 
   vp9_clamp_mv_min_max(x, ref_mv);
 
@@ -43,15 +44,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
   ref_full.as_mv.row = ref_mv->as_mv.row >> 3;
 
   /*cpi->sf.search_method == HEX*/
-  best_err = vp9_hex_search(
-      x,
-      &ref_full, dst_mv,
-      step_param,
-      x->errorperbit,
-      &v_fn_ptr,
-      NULL, NULL,
-      NULL, NULL,
-      ref_mv);
+  best_err = vp9_hex_search(x, &ref_full, dst_mv, step_param, x->errorperbit,
+                            &v_fn_ptr, NULL, NULL, NULL, NULL, ref_mv);
 
   // Try sub-pixel MC
   // if (bestsme > error_thresh && bestsme < INT_MAX)
@@ -81,18 +75,11 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
   return best_err;
 }
 
-static int do_16x16_motion_search
-(
-  VP9_COMP *cpi,
-  int_mv *ref_mv,
-  int_mv *dst_mv,
-  YV12_BUFFER_CONFIG *buf,
-  int buf_mb_y_offset,
-  YV12_BUFFER_CONFIG *ref,
-  int mb_y_offset,
-  int mb_row,
-  int mb_col) {
-  MACROBLOCK   *const x  = &cpi->mb;
+static int do_16x16_motion_search(VP9_COMP *cpi,
+                                  int_mv *ref_mv, int_mv *dst_mv,
+                                  int buf_mb_y_offset, int mb_y_offset,
+                                  int mb_row, int mb_col) {
+  MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   unsigned int err, tmp_err;
   int_mv tmp_mv;
@@ -108,7 +95,7 @@ static int do_16x16_motion_search
   // starting point (best reference) for the search
   tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
   if (tmp_err < err) {
-    err            = tmp_err;
+    err = tmp_err;
     dst_mv->as_int = tmp_mv.as_int;
   }
 
@@ -129,16 +116,10 @@ static int do_16x16_motion_search
   return err;
 }
 
-static int do_16x16_zerozero_search
-(
-  VP9_COMP *cpi,
-  int_mv *dst_mv,
-  YV12_BUFFER_CONFIG *buf,
-  int buf_mb_y_offset,
-  YV12_BUFFER_CONFIG *ref,
-  int mb_y_offset
-) {
-  MACROBLOCK   *const x  = &cpi->mb;
+static int do_16x16_zerozero_search(VP9_COMP *cpi,
+                                    int_mv *dst_mv,
+                                    int buf_mb_y_offset, int mb_y_offset) {
+  MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   unsigned int err;
 
@@ -152,13 +133,9 @@ static int do_16x16_zerozero_search
 
   return err;
 }
-static int find_best_16x16_intra
-(
-  VP9_COMP *cpi,
-  YV12_BUFFER_CONFIG *buf,
-  int mb_y_offset,
-  MB_PREDICTION_MODE *pbest_mode
-) {
+static int find_best_16x16_intra(VP9_COMP *cpi,
+                                 int mb_y_offset,
+                                 MB_PREDICTION_MODE *pbest_mode) {
   MACROBLOCK   *const x  = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_PREDICTION_MODE best_mode = -1, mode;
@@ -209,7 +186,7 @@ static void update_mbgraph_mb_stats
   int mb_row,
   int mb_col
 ) {
-  MACROBLOCK   *const x  = &cpi->mb;
+  MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   int intra_error;
   VP9_COMMON *cm = &cpi->common;
@@ -222,7 +199,7 @@ static void update_mbgraph_mb_stats
   xd->plane[0].dst.stride = cm->yv12_fb[cm->new_fb_idx].y_stride;
 
   // do intra 16x16 prediction
-  intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset,
+  intra_error = find_best_16x16_intra(cpi, mb_y_offset,
                                       &stats->ref[INTRA_FRAME].m.mode);
   if (intra_error <= 0)
     intra_error = 1;
@@ -233,10 +210,10 @@ static void update_mbgraph_mb_stats
     int g_motion_error;
     xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
     xd->plane[0].pre[0].stride = golden_ref->y_stride;
-    g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,
+    g_motion_error = do_16x16_motion_search(cpi,
+                                            prev_golden_ref_mv,
                                             &stats->ref[GOLDEN_FRAME].m.mv,
-                                            buf, mb_y_offset,
-                                            golden_ref, gld_y_offset,
+                                            mb_y_offset, gld_y_offset,
                                             mb_row, mb_col);
     stats->ref[GOLDEN_FRAME].err = g_motion_error;
   } else {
@@ -251,8 +228,7 @@ static void update_mbgraph_mb_stats
     xd->plane[0].pre[0].stride = alt_ref->y_stride;
     a_motion_error = do_16x16_zerozero_search(cpi,
                                               &stats->ref[ALTREF_FRAME].m.mv,
-                                              buf, mb_y_offset,
-                                              alt_ref, arf_y_offset);
+                                              mb_y_offset, arf_y_offset);
 
     stats->ref[ALTREF_FRAME].err = a_motion_error;
   } else {
@@ -261,17 +237,15 @@ static void update_mbgraph_mb_stats
   }
 }
 
-static void update_mbgraph_frame_stats
-(
-  VP9_COMP *cpi,
-  MBGRAPH_FRAME_STATS *stats,
-  YV12_BUFFER_CONFIG *buf,
-  YV12_BUFFER_CONFIG *golden_ref,
-  YV12_BUFFER_CONFIG *alt_ref
-) {
-  MACROBLOCK   *const x  = &cpi->mb;
-  VP9_COMMON   *const cm = &cpi->common;
+static void update_mbgraph_frame_stats(VP9_COMP *cpi,
+                                       MBGRAPH_FRAME_STATS *stats,
+                                       YV12_BUFFER_CONFIG *buf,
+                                       YV12_BUFFER_CONFIG *golden_ref,
+                                       YV12_BUFFER_CONFIG *alt_ref) {
+  MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
+  VP9_COMMON *const cm = &cpi->common;
+
   int mb_col, mb_row, offset = 0;
   int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
   int_mv arf_top_mv, gld_top_mv;
@@ -360,17 +334,16 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
     for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
          offset += cm->mb_cols, mb_row++) {
       for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
-        MBGRAPH_MB_STATS *mb_stats =
-          &frame_stats->mb_stats[offset + mb_col];
+        MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
 
         int altref_err = mb_stats->ref[ALTREF_FRAME].err;
         int intra_err  = mb_stats->ref[INTRA_FRAME ].err;
         int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
 
         // Test for altref vs intra and gf and that its mv was 0,0.
-        if ((altref_err > 1000) ||
-            (altref_err > intra_err) ||
-            (altref_err > golden_err)) {
+        if (altref_err > 1000 ||
+            altref_err > intra_err ||
+            altref_err > golden_err) {
           arf_not_zz[offset + mb_col]++;
         }
       }
@@ -450,8 +423,7 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
   // the ARF MC search backwards, to get optimal results for MV caching
   for (i = 0; i < n_frames; i++) {
     MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
-    struct lookahead_entry *q_cur =
-      vp9_lookahead_peek(cpi->lookahead, i);
+    struct lookahead_entry *q_cur = vp9_lookahead_peek(cpi->lookahead, i);
 
     assert(q_cur != NULL);
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 5fdbbefe9..782816f92 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -695,7 +695,7 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
 void vp9_set_speed_features(VP9_COMP *cpi) {
   SPEED_FEATURES *sf = &cpi->sf;
   int mode = cpi->compressor_speed;
-  int speed = cpi->Speed;
+  int speed = cpi->speed;
   int i;
 
   // Only modes 0 and 1 supported for now in experimental code basae
@@ -830,7 +830,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
 
 #ifdef SPEEDSTATS
-  frames_at_speed[cpi->Speed]++;
+  frames_at_speed[cpi->speed]++;
 #endif
 }
 
@@ -1215,7 +1215,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
     cpi->last_boosted_qindex = cpi->oxcf.fixed_q;
   }
 
-  cpi->Speed = cpi->oxcf.cpu_used;
+  cpi->speed = cpi->oxcf.cpu_used;
 
   if (cpi->oxcf.lag_in_frames == 0) {
     // force to allowlag to 0 if lag_in_frames is 0;
@@ -3851,12 +3851,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
     if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) {
       memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i]));
-      continue;
+    } else {
+      YV12_BUFFER_CONFIG *fb = &cm->yv12_fb[cm->active_ref_idx[i]];
+      vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],
+                                        fb->y_crop_width, fb->y_crop_height,
+                                        cm->width, cm->height);
     }
-
-    vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],
-                                      &cm->yv12_fb[cm->active_ref_idx[i]],
-                                      cm->width, cm->height);
   }
 
   vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index d3ae2498f..aba4c0e64 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -247,8 +247,7 @@ enum BlockSize {
   BLOCK_8X16,
   BLOCK_16X8,
   BLOCK_16X16,
-  BLOCK_MAX_SEGMENTS,
-  BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
+  BLOCK_32X32,
   BLOCK_32X16,
   BLOCK_16X32,
   BLOCK_64X32,
@@ -467,7 +466,7 @@ typedef struct VP9_COMP {
   // for real time encoding
   int avg_encode_time;              // microsecond
   int avg_pick_mode_time;            // microsecond
-  int Speed;
+  int speed;
   unsigned int cpu_freq;           // Mhz
   int compressor_speed;
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 92e58f155..1b143f5e0 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1748,6 +1748,13 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep,
   vp9_clear_system_state();
 }
 
+static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
+                                           struct macroblockd_plane *pd) {
+  const int bwl = b_width_log2(bsize) - pd->subsampling_x;
+  const int bhl = b_height_log2(bsize) - pd->subsampling_y;
+  return get_block_size(4 << bwl, 4 << bhl);
+}
+
 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                             MACROBLOCK *x, MACROBLOCKD *xd,
                             int *out_rate_sum, int *out_dist_sum) {
@@ -1761,6 +1768,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
 
+    // TODO(dkovalev) the same code in get_plane_block_size
     const int bwl = b_width_log2(bsize) - pd->subsampling_x;
     const int bhl = b_height_log2(bsize) - pd->subsampling_y;
     const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl);
@@ -1778,42 +1786,6 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
   *out_dist_sum = dist_sum;
 }
 
-static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
-  switch (bs) {
-    case BLOCK_64X64: return BLOCK_32X32;
-    case BLOCK_64X32: return BLOCK_32X16;
-    case BLOCK_32X64: return BLOCK_16X32;
-    case BLOCK_32X32: return BLOCK_16X16;
-    case BLOCK_32X16: return BLOCK_16X8;
-    case BLOCK_16X32: return BLOCK_8X16;
-    case BLOCK_16X16: return BLOCK_8X8;
-    case BLOCK_16X8:  return BLOCK_8X4;
-    case BLOCK_8X16:  return BLOCK_4X8;
-    case BLOCK_8X8:   return BLOCK_4X4;
-    default:
-      assert(0);
-      return -1;
-  }
-}
-
-static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) {
-  switch (bs) {
-    case BLOCK_SIZE_SB64X64: return BLOCK_64X64;
-    case BLOCK_SIZE_SB64X32: return BLOCK_64X32;
-    case BLOCK_SIZE_SB32X64: return BLOCK_32X64;
-    case BLOCK_SIZE_SB32X32: return BLOCK_32X32;
-    case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
-    case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
-    case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
-    case BLOCK_SIZE_SB16X8:  return BLOCK_16X8;
-    case BLOCK_SIZE_SB8X16:  return BLOCK_8X16;
-    case BLOCK_SIZE_SB8X8:   return BLOCK_8X8;
-    default:
-      assert(0);
-      return -1;
-  }
-}
-
 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
@@ -1838,10 +1810,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                  YV12_BUFFER_CONFIG *scaled_ref_frame,
                                  int mi_row, int mi_col) {
   const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
-  const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
-  const enum BlockSize uv_block_size = y_to_uv_block_size(block_size);
+
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
+  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
+  const enum BlockSize uv_block_size = get_plane_block_size(bsize,
+                                                            &xd->plane[1]);
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
   const int is_comp_pred = (mbmi->second_ref_frame > 0);
   const int num_refs = is_comp_pred ? 2 : 1;
@@ -2219,10 +2193,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *returndistortion,
                                   BLOCK_SIZE_TYPE bsize,
                                   PICK_MODE_CONTEXT *ctx) {
-  const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
   MB_PREDICTION_MODE this_mode;
   MB_PREDICTION_MODE best_mode = DC_PRED;
   MV_REFERENCE_FRAME ref_frame, second_ref = INTRA_FRAME;
@@ -2284,7 +2258,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     best_txfm_rd[i] = INT64_MAX;
 
   // Create a mask set to 1 for each frame used by a smaller resolution.
-  if (cpi->Speed > 0) {
+  if (cpi->speed > 0) {
     switch (block_size) {
       case BLOCK_64X64:
         for (i = 0; i < 4; i++) {
@@ -2324,8 +2298,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
-  if (cpi->Speed == 0
-      || (cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
+  if (cpi->speed == 0
+      || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
     mbmi->mode = DC_PRED;
     for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
                       (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
@@ -2363,7 +2337,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
       continue;
     }
-    if (cpi->Speed > 0) {
+    if (cpi->speed > 0) {
       if (!(ref_frame_mask & (1 << ref_frame))) {
         continue;
       }
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index d272cbb8d..30143d77d 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -118,9 +118,9 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
 #if ALT_REF_MC_ENABLED
 
 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
-                                              YV12_BUFFER_CONFIG *arf_frame,
-                                              YV12_BUFFER_CONFIG *frame_ptr,
-                                              int mb_offset,
+                                              uint8_t *arf_frame_buf,
+                                              uint8_t *frame_ptr_buf,
+                                              int stride,
                                               int error_thresh) {
   MACROBLOCK *x = &cpi->mb;
   MACROBLOCKD* const xd = &x->e_mbd;
@@ -141,18 +141,16 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
   best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3;
 
   // Setup frame pointers
-  x->plane[0].src.buf = arf_frame->y_buffer + mb_offset;
-  x->plane[0].src.stride = arf_frame->y_stride;
-  xd->plane[0].pre[0].buf = frame_ptr->y_buffer + mb_offset;
-  xd->plane[0].pre[0].stride = arf_frame->y_stride;
+  x->plane[0].src.buf = arf_frame_buf;
+  x->plane[0].src.stride = stride;
+  xd->plane[0].pre[0].buf = frame_ptr_buf;
+  xd->plane[0].pre[0].stride = stride;
 
   // Further step/diamond searches as necessary
-  if (cpi->Speed < 8) {
-    step_param = cpi->sf.first_step +
-                 ((cpi->Speed > 5) ? 1 : 0);
-  } else {
+  if (cpi->speed < 8)
+    step_param = cpi->sf.first_step + ((cpi->speed > 5) ? 1 : 0);
+  else
     step_param = cpi->sf.first_step + 2;
-  }
 
   /*cpi->sf.search_method == HEX*/
   // TODO Check that the 16x16 vf & sdf are selected here
@@ -260,9 +258,9 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
           // Find best match in this frame by MC
           err = temporal_filter_find_matching_mb_c
                 (cpi,
-                 cpi->frames[alt_ref_index],
-                 cpi->frames[frame],
-                 mb_y_offset,
+                 cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
+                 cpi->frames[frame]->y_buffer + mb_y_offset,
+                 cpi->frames[frame]->y_stride,
                  THRESH_LOW);
 #endif
           // Assign higher weight to matching MB if it's error
@@ -360,10 +358,10 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
 }
 
 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
+  VP9_COMMON *const cm = &cpi->common;
+
   int frame = 0;
 
-  int num_frames_backward = 0;
-  int num_frames_forward = 0;
   int frames_to_blur_backward = 0;
   int frames_to_blur_forward = 0;
   int frames_to_blur = 0;
@@ -373,15 +371,13 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
   int blur_type = cpi->oxcf.arnr_type;
   int max_frames = cpi->active_arnr_frames;
 
-  num_frames_backward = distance;
-  num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
-                       - (num_frames_backward + 1);
+  const int num_frames_backward = distance;
+  const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
+                               - (num_frames_backward + 1);
 
   switch (blur_type) {
     case 1:
-      /////////////////////////////////////////
       // Backward Blur
-
       frames_to_blur_backward = num_frames_backward;
 
       if (frames_to_blur_backward >= max_frames)
@@ -391,7 +387,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
       break;
 
     case 2:
-      /////////////////////////////////////////
       // Forward Blur
 
       frames_to_blur_forward = num_frames_forward;
@@ -404,7 +399,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
 
     case 3:
     default:
-      /////////////////////////////////////////
       // Center Blur
       frames_to_blur_forward = num_frames_forward;
       frames_to_blur_backward = num_frames_backward;
@@ -444,25 +438,22 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
 
   // Setup scaling factors. Scaling on each of the arnr frames is not supported
   vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0],
-      &cpi->common.yv12_fb[cpi->common.new_fb_idx],
-      cpi->common.width,
-      cpi->common.height);
+      cm->yv12_fb[cm->new_fb_idx].y_crop_width,
+      cm->yv12_fb[cm->new_fb_idx].y_crop_height,
+      cm->width, cm->height);
   cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];
 
   // Setup frame pointers, NULL indicates frame not included in filter
   vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
   for (frame = 0; frame < frames_to_blur; frame++) {
-    int which_buffer =  start_frame - frame;
+    int which_buffer = start_frame - frame;
     struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
                                                      which_buffer);
     cpi->frames[frames_to_blur - 1 - frame] = &buf->img;
   }
 
-  temporal_filter_iterate_c(
-    cpi,
-    frames_to_blur,
-    frames_to_blur_backward,
-    strength);
+  temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward,
+                            strength);
 }
 
 void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,