5 files changed, 40 insertions, 28 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index b8c86ea43..362077a6c 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -6146,7 +6146,7 @@ static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
     // TODO(angiebird): Consider subpixel when computing the sse.
     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
                           pre.stride, &sse);
-    return (double)sse;
+    return (double)(sse << VP9_DIST_SCALE_LOG2);
   } else {
     assert(0);
     return 0;
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index fa85f2176..062ca3277 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -42,6 +42,9 @@ extern "C" {
 #define RD_THRESH_MAX_FACT 64
 #define RD_THRESH_INC 1
 
+#define VP9_DIST_SCALE_LOG2 4
+#define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2)
+
 // This enumerator type needs to be kept aligned with the mode order in
 // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
 typedef enum {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c73b0ed87..6f07269d4 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -273,9 +273,9 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
   }
 
   *skip_txfm_sb = skip_flag;
-  *skip_sse_sb = total_sse << 4;
+  *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2;
   *out_rate_sum = (int)rate_sum;
-  *out_dist_sum = dist_sum << 4;
+  *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2;
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index d02603615..0b636b85c 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -225,24 +225,24 @@ static INLINE int get_filter_weight(unsigned int i, unsigned int j,
                                     unsigned int block_height,
                                     unsigned int block_width,
                                     const int *const blk_fw, int use_32x32) {
-  int filter_weight = 0;
-
-  if (use_32x32)
-    // blk_fw[0] ~ blk_fw[3] are the same.
+  // blk_fw[0] ~ blk_fw[3] are the same.
+  if (use_32x32) {
     return blk_fw[0];
+  }
 
   if (i < block_height / 2) {
-    if (j < block_width / 2)
-      filter_weight = blk_fw[0];
-    else
-      filter_weight = blk_fw[1];
-  } else {
-    if (j < block_width / 2)
-      filter_weight = blk_fw[2];
-    else
-      filter_weight = blk_fw[3];
+    if (j < block_width / 2) {
+      return blk_fw[0];
+    }
+
+    return blk_fw[1];
+  }
+
+  if (j < block_width / 2) {
+    return blk_fw[2];
   }
-  return filter_weight;
+
+  return blk_fw[3];
 }
 
 void vp9_apply_temporal_filter_c(
@@ -296,7 +296,7 @@ void vp9_apply_temporal_filter_c(
   for (i = 0, k = 0, m = 0; i < block_height; i++) {
     for (j = 0; j < block_width; j++) {
       const int pixel_value = y_pred[i * y_buf_stride + j];
-      int filter_weight =
+      const int filter_weight =
           get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32);
 
       // non-local mean approach
diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c
index 9f9483a9b..a97c96dee 100644
--- a/vp9/encoder/x86/temporal_filter_sse4.c
+++ b/vp9/encoder/x86/temporal_filter_sse4.c
@@ -254,7 +254,7 @@ static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist,
                                            __m128i *v_first,
                                            __m128i *v_second) {
   if (!ss_x) {
-    // If there is no chroma subsampling in the horizaontal direction, then we
+    // If there is no chroma subsampling in the horizontal direction, then we
     // need to load 16 entries from chroma.
     read_dist_16(u_dist, u_first, u_second);
     read_dist_16(v_dist, v_first, v_second);
@@ -274,8 +274,8 @@ static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist,
   }
 }
 
-// Horizonta add unsigned 16-bit ints in src and store them as signed 32-bit int
-// in dst.
+// Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit
+// int in dst.
 static INLINE void hadd_epu16(__m128i *src, __m128i *dst) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i shift_right = _mm_srli_si128(*src, 2);
@@ -323,7 +323,7 @@ static INLINE void add_luma_dist_to_8_chroma_mod(const uint16_t *y_dist,
 
 // Apply temporal filter to the luma components. This performs temporal
 // filtering on a luma block of 16 X block_height. Use blk_fw as an array of
-// size 4for the weights for each of the 4 subblocks if blk_fw is not NULL,
+// size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL,
 // else use top_weight for top half, and bottom weight for bottom half.
 static void vp9_apply_temporal_filter_luma_16(
     const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre,
@@ -350,6 +350,9 @@ static void vp9_apply_temporal_filter_luma_16(
   __m128i sum_row_first;
   __m128i sum_row_second;
 
+  // Loop variables
+  unsigned int h;
+
   assert(strength >= 0);
   assert(strength <= 6);
 
@@ -408,7 +411,7 @@ static void vp9_apply_temporal_filter_luma_16(
   mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[1]);
   mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[1]);
 
-  for (unsigned int h = 1; h < block_height - 1; ++h) {
+  for (h = 1; h < block_height - 1; ++h) {
     // Move the weight to bottom half
     if (!use_whole_blk && h == block_height / 2) {
       if (blk_fw) {
@@ -640,6 +643,9 @@ static void vp9_apply_temporal_filter_chroma_8(
 
   __m128i u_sum_row, v_sum_row;
 
+  // Loop variable
+  unsigned int h;
+
   (void)uv_block_width;
 
   // First row
@@ -690,7 +696,7 @@ static void vp9_apply_temporal_filter_chroma_8(
   // Then all the rows except the last one
   mul = _mm_loadu_si128((const __m128i *)neighbors[1]);
 
-  for (unsigned int h = 1; h < uv_block_height - 1; ++h) {
+  for (h = 1; h < uv_block_height - 1; ++h) {
     // Move the weight pointer to the bottom half of the blocks
     if (h == uv_block_height / 2) {
       if (blk_fw) {
@@ -936,6 +942,9 @@ void vp9_apply_temporal_filter_sse4_1(
   const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src;
   const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre;
 
+  // Loop variables
+  unsigned int row, blk_col;
+
   assert(block_width <= BW && "block width too large");
   assert(block_height <= BH && "block height too large");
   assert(block_width % 16 == 0 && "block width must be multiple of 16");
@@ -953,8 +962,8 @@ void vp9_apply_temporal_filter_sse4_1(
       "subblock filter weight must be less than 2");
 
   // Precompute the difference sqaured
-  for (unsigned int row = 0; row < block_height; row++) {
-    for (unsigned int blk_col = 0; blk_col < block_width; blk_col += 16) {
+  for (row = 0; row < block_height; row++) {
+    for (blk_col = 0; blk_col < block_width; blk_col += 16) {
       store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col,
                     y_dist_ptr + blk_col);
     }
@@ -963,8 +972,8 @@ void vp9_apply_temporal_filter_sse4_1(
     y_dist_ptr += DIST_STRIDE;
   }
 
-  for (unsigned int row = 0; row < chroma_height; row++) {
-    for (unsigned int blk_col = 0; blk_col < chroma_width; blk_col += 8) {
+  for (row = 0; row < chroma_height; row++) {
+    for (blk_col = 0; blk_col < chroma_width; blk_col += 8) {
       store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col,
                    u_dist_ptr + blk_col);
       store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col,