5 files changed, 25 insertions, 20 deletions
diff --git a/tools_common.c b/tools_common.c
index 901734e0f..8d356af3f 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -392,7 +392,7 @@ void vpx_img_truncate_16_to_8(vpx_image_t *dst, vpx_image_t *src) {
           (uint16_t *)(src->planes[plane] + y * src->stride[plane]);
       uint8_t *p_dst = dst->planes[plane] + y * dst->stride[plane];
       for (x = 0; x < w; x++) {
-        *p_dst++ = *p_src++;
+        *p_dst++ = (uint8_t)(*p_src++);
       }
     }
   }
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index cee1682a6..cbce2dd89 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -37,6 +37,10 @@ extern "C" {
   _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
                 (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
 
+#define octa_set_epi16(a, b, c, d, e, f, g, h) \
+  _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
+                 (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
+
 // Constants:
 //  for (int i = 1; i< 32; ++i)
 //    printf("static const int cospi_%d_64 = %.0f;\n", i,
@@ -158,7 +162,7 @@ typedef struct {
 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits
 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
 #else
-#define WRAPLOW(x, bd) (x)
+#define WRAPLOW(x, bd) ((int32_t)(x))
 #endif  // CONFIG_EMULATE_HARDWARE
 
 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
index 003ebd13f..5074d31a7 100644
--- a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -28,7 +28,8 @@ void vp9_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
         temp_in[j] = intermediate[j * 32 + i];
       vp9_fdct32(temp_in, temp_out, 0);
       for (j = 0; j < 32; ++j)
-        out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+        out[j + i * 32] =
+            (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
     }
 }
   #define HIGH_FDCT32x32_2D_C vp9_highbd_fdct32x32_c
@@ -42,7 +43,7 @@ void vp9_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) {
         temp_in[j] = intermediate[j * 32 + i];
       vp9_fdct32(temp_in, temp_out, 1);
       for (j = 0; j < 32; ++j)
-        out[j + i * 32] = temp_out[j];
+        out[j + i * 32] = (tran_low_t)temp_out[j];
     }
 }
   #define HIGH_FDCT32x32_2D_C vp9_highbd_fdct32x32_rd_c
diff --git a/vp9/encoder/x86/vp9_dct_sse2_impl.h b/vp9/encoder/x86/vp9_dct_sse2_impl.h
index 11bf5a25e..86e9ecf73 100644
--- a/vp9/encoder/x86/vp9_dct_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct_sse2_impl.h
@@ -40,35 +40,35 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
   // These are the coefficients used for the multiplies.
   // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
   // where cospi_N_64 = cos(N pi /64)
-  const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+  const __m128i k__cospi_A = octa_set_epi16(cospi_16_64, cospi_16_64,
                                             cospi_16_64, cospi_16_64,
                                             cospi_16_64, -cospi_16_64,
                                             cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+  const __m128i k__cospi_B = octa_set_epi16(cospi_16_64, -cospi_16_64,
                                             cospi_16_64, -cospi_16_64,
                                             cospi_16_64, cospi_16_64,
                                             cospi_16_64, cospi_16_64);
-  const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+  const __m128i k__cospi_C = octa_set_epi16(cospi_8_64, cospi_24_64,
                                             cospi_8_64, cospi_24_64,
                                             cospi_24_64, -cospi_8_64,
                                             cospi_24_64, -cospi_8_64);
-  const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+  const __m128i k__cospi_D = octa_set_epi16(cospi_24_64, -cospi_8_64,
                                             cospi_24_64, -cospi_8_64,
                                             cospi_8_64, cospi_24_64,
                                             cospi_8_64, cospi_24_64);
-  const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+  const __m128i k__cospi_E = octa_set_epi16(cospi_16_64, cospi_16_64,
                                             cospi_16_64, cospi_16_64,
                                             cospi_16_64, cospi_16_64,
                                             cospi_16_64, cospi_16_64);
-  const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+  const __m128i k__cospi_F = octa_set_epi16(cospi_16_64, -cospi_16_64,
                                             cospi_16_64, -cospi_16_64,
                                             cospi_16_64, -cospi_16_64,
                                             cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+  const __m128i k__cospi_G = octa_set_epi16(cospi_8_64, cospi_24_64,
                                             cospi_8_64, cospi_24_64,
                                             -cospi_8_64, -cospi_24_64,
                                             -cospi_8_64, -cospi_24_64);
-  const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+  const __m128i k__cospi_H = octa_set_epi16(cospi_24_64, -cospi_8_64,
                                             cospi_24_64, -cospi_8_64,
                                             -cospi_24_64, cospi_8_64,
                                             -cospi_24_64, cospi_8_64);
@@ -267,7 +267,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
   //    When we use them, in one case, they are all the same. In all others
   //    it's a pair of them that we need to repeat four times. This is done
   //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
   const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
@@ -588,7 +588,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
   //    When we use them, in one case, they are all the same. In all others
   //    it's a pair of them that we need to repeat four times. This is done
   //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
   const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
index fe35c1e86..b45331caa 100644
--- a/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
@@ -50,7 +50,7 @@ static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
                                     high_variance_fn_t var_fn, int block_size) {
   int i, j;
   uint64_t sse_long = 0;
-  int64_t sum_long = 0;
+  int32_t sum_long = 0;
 
   for (i = 0; i < h; i += block_size) {
     for (j = 0; j < w; j += block_size) {
@@ -63,7 +63,7 @@ static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
     }
   }
   *sum = ROUND_POWER_OF_TWO(sum_long, 2);
-  *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
 }
 
 static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
@@ -72,7 +72,7 @@ static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
                                     high_variance_fn_t var_fn, int block_size) {
   int i, j;
   uint64_t sse_long = 0;
-  int64_t sum_long = 0;
+  int32_t sum_long = 0;
 
   for (i = 0; i < h; i += block_size) {
     for (j = 0; j < w; j += block_size) {
@@ -85,7 +85,7 @@ static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
     }
   }
   *sum = ROUND_POWER_OF_TWO(sum_long, 4);
-  *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
 }
 
 
@@ -386,7 +386,7 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
     } \
   } \
   se = ROUND_POWER_OF_TWO(se, 4); \
-  sse = ROUND_POWER_OF_TWO(long_sse, 8); \
+  sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
   *sse_ptr = sse; \
   return sse - ((cast se * se) >> (wlog2 + hlog2)); \
 }
@@ -555,7 +555,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
     } \
   } \
   se = ROUND_POWER_OF_TWO(se, 4); \
-  sse = ROUND_POWER_OF_TWO(long_sse, 8); \
+  sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
   *sse_ptr = sse; \
   return sse - ((cast se * se) >> (wlog2 + hlog2)); \
 }