vp9/encoder: apply clang-format

Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2
author: clang-format <noreply@google.com> 2016-07-26 20:43:23 -0700
committer: James Zern <jzern@google.com> 2016-08-02 16:47:11 -0700
commit: e0cc52db3fc9b09c99d7bbee35153cf82964a860 (patch)
tree: 4988f1d3a21056339e2ffbd7a3b3d52fab54cb6b /vp9/encoder/x86
parent: 3a04c9c9c4c4935925f4c00dcc70610100c5e9dd (diff)
download: libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.gz
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.bz2
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.zip
8 files changed, 269 insertions, 308 deletions
diff --git a/vp9/encoder/x86/vp9_dct_intrin_sse2.c b/vp9/encoder/x86/vp9_dct_intrin_sse2.c
index fa37b6fed..0712779b7 100644
--- a/vp9/encoder/x86/vp9_dct_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_intrin_sse2.c
@@ -78,8 +78,8 @@ static void fdct4_sse2(__m128i *in) {
   const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
 
   __m128i u[4], v[4];
-  u[0]=_mm_unpacklo_epi16(in[0], in[1]);
-  u[1]=_mm_unpacklo_epi16(in[3], in[2]);
+  u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+  u[1] = _mm_unpacklo_epi16(in[3], in[2]);
 
   v[0] = _mm_add_epi16(u[0], u[1]);
   v[1] = _mm_sub_epi16(u[0], u[1]);
@@ -151,14 +151,12 @@ static void fadst4_sse2(__m128i *in) {
   transpose_4x4(in);
 }
 
-void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output,
-                     int stride, int tx_type) {
+void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
+                     int tx_type) {
   __m128i in[4];
 
   switch (tx_type) {
-    case DCT_DCT:
-      vpx_fdct4x4_sse2(input, output, stride);
-      break;
+    case DCT_DCT: vpx_fdct4x4_sse2(input, output, stride); break;
     case ADST_DCT:
       load_buffer_4x4(input, in, stride);
       fadst4_sse2(in);
@@ -177,21 +175,18 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output,
       fadst4_sse2(in);
       write_buffer_4x4(output, in);
       break;
-   default:
-     assert(0);
-     break;
+    default: assert(0); break;
   }
 }
 
 void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
-                            int16_t* coeff_ptr, intptr_t n_coeffs,
-                            int skip_block, const int16_t* zbin_ptr,
-                            const int16_t* round_ptr, const int16_t* quant_ptr,
-                            const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
-                            int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                            uint16_t* eob_ptr,
-                            const int16_t* scan_ptr,
-                            const int16_t* iscan_ptr) {
+                            int16_t *coeff_ptr, intptr_t n_coeffs,
+                            int skip_block, const int16_t *zbin_ptr,
+                            const int16_t *round_ptr, const int16_t *quant_ptr,
+                            const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
+                            int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                            uint16_t *eob_ptr, const int16_t *scan_ptr,
+                            const int16_t *iscan_ptr) {
   __m128i zero;
   int pass;
   // Constants
@@ -208,14 +203,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
   const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
   const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
   // Load input
-  __m128i in0  = _mm_load_si128((const __m128i *)(input + 0 * stride));
-  __m128i in1  = _mm_load_si128((const __m128i *)(input + 1 * stride));
-  __m128i in2  = _mm_load_si128((const __m128i *)(input + 2 * stride));
-  __m128i in3  = _mm_load_si128((const __m128i *)(input + 3 * stride));
-  __m128i in4  = _mm_load_si128((const __m128i *)(input + 4 * stride));
-  __m128i in5  = _mm_load_si128((const __m128i *)(input + 5 * stride));
-  __m128i in6  = _mm_load_si128((const __m128i *)(input + 6 * stride));
-  __m128i in7  = _mm_load_si128((const __m128i *)(input + 7 * stride));
+  __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+  __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+  __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+  __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+  __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+  __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+  __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+  __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
   __m128i *in[8];
   int index = 0;
 
@@ -469,9 +464,9 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
 
       // Setup global values
       {
-        round = _mm_load_si128((const __m128i*)round_ptr);
-        quant = _mm_load_si128((const __m128i*)quant_ptr);
-        dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+        round = _mm_load_si128((const __m128i *)round_ptr);
+        quant = _mm_load_si128((const __m128i *)quant_ptr);
+        dequant = _mm_load_si128((const __m128i *)dequant_ptr);
       }
 
       {
@@ -503,15 +498,15 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
         qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
         qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
 
         coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
         dequant = _mm_unpackhi_epi64(dequant, dequant);
         coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
       }
 
       {
@@ -524,8 +519,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
         zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
         nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
         nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
         // Add one to convert from indices to counts
         iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
         iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -568,14 +563,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
         qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
         qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
 
         coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
         coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
       }
 
       {
@@ -588,8 +583,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
         zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
         nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
         nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
         // Add one to convert from indices to counts
         iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
         iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -615,10 +610,10 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
     }
   } else {
     do {
-      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
-      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
       n_coeffs += 8 * 2;
     } while (n_coeffs < 0);
     *eob_ptr = 0;
@@ -628,14 +623,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
 // load 8x8 array
 static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
                                    int stride) {
-  in[0]  = _mm_load_si128((const __m128i *)(input + 0 * stride));
-  in[1]  = _mm_load_si128((const __m128i *)(input + 1 * stride));
-  in[2]  = _mm_load_si128((const __m128i *)(input + 2 * stride));
-  in[3]  = _mm_load_si128((const __m128i *)(input + 3 * stride));
-  in[4]  = _mm_load_si128((const __m128i *)(input + 4 * stride));
-  in[5]  = _mm_load_si128((const __m128i *)(input + 5 * stride));
-  in[6]  = _mm_load_si128((const __m128i *)(input + 6 * stride));
-  in[7]  = _mm_load_si128((const __m128i *)(input + 7 * stride));
+  in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+  in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+  in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+  in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+  in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+  in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+  in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+  in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
 
   in[0] = _mm_slli_epi16(in[0], 2);
   in[1] = _mm_slli_epi16(in[1], 2);
@@ -930,14 +925,14 @@ static void fadst8_sse2(__m128i *in) {
   __m128i in0, in1, in2, in3, in4, in5, in6, in7;
 
   // properly aligned for butterfly input
-  in0  = in[7];
-  in1  = in[0];
-  in2  = in[5];
-  in3  = in[2];
-  in4  = in[3];
-  in5  = in[4];
-  in6  = in[1];
-  in7  = in[6];
+  in0 = in[7];
+  in1 = in[0];
+  in2 = in[5];
+  in3 = in[2];
+  in4 = in[3];
+  in5 = in[4];
+  in6 = in[1];
+  in7 = in[6];
 
   // column transformation
   // stage 1
@@ -1135,14 +1130,12 @@ static void fadst8_sse2(__m128i *in) {
   array_transpose_8x8(in, in);
 }
 
-void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output,
-                     int stride, int tx_type) {
+void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
+                     int tx_type) {
   __m128i in[8];
 
   switch (tx_type) {
-    case DCT_DCT:
-      vpx_fdct8x8_sse2(input, output, stride);
-      break;
+    case DCT_DCT: vpx_fdct8x8_sse2(input, output, stride); break;
     case ADST_DCT:
       load_buffer_8x8(input, in, stride);
       fadst8_sse2(in);
@@ -1164,13 +1157,11 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output,
       right_shift_8x8(in, 1);
       write_buffer_8x8(output, in, 8);
       break;
-    default:
-      assert(0);
-      break;
+    default: assert(0); break;
   }
 }
 
-static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0,
+static INLINE void load_buffer_16x16(const int16_t *input, __m128i *in0,
                                      __m128i *in1, int stride) {
   // load first 8 columns
   load_buffer_8x8(input, in0, stride);
@@ -1530,13 +1521,13 @@ static void fdct16_8col(__m128i *in) {
   v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
   v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
 
-  in[1]  = _mm_packs_epi32(v[0], v[1]);
-  in[9]  = _mm_packs_epi32(v[2], v[3]);
-  in[5]  = _mm_packs_epi32(v[4], v[5]);
+  in[1] = _mm_packs_epi32(v[0], v[1]);
+  in[9] = _mm_packs_epi32(v[2], v[3]);
+  in[5] = _mm_packs_epi32(v[4], v[5]);
   in[13] = _mm_packs_epi32(v[6], v[7]);
-  in[3]  = _mm_packs_epi32(v[8], v[9]);
+  in[3] = _mm_packs_epi32(v[8], v[9]);
   in[11] = _mm_packs_epi32(v[10], v[11]);
-  in[7]  = _mm_packs_epi32(v[12], v[13]);
+  in[7] = _mm_packs_epi32(v[12], v[13]);
   in[15] = _mm_packs_epi32(v[14], v[15]);
 }
 
@@ -2022,14 +2013,12 @@ static void fadst16_sse2(__m128i *in0, __m128i *in1) {
   array_transpose_16x16(in0, in1);
 }
 
-void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output,
-                       int stride, int tx_type) {
+void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
+                       int tx_type) {
   __m128i in0[16], in1[16];
 
   switch (tx_type) {
-    case DCT_DCT:
-      vpx_fdct16x16_sse2(input, output, stride);
-      break;
+    case DCT_DCT: vpx_fdct16x16_sse2(input, output, stride); break;
     case ADST_DCT:
       load_buffer_16x16(input, in0, in1, stride);
       fadst16_sse2(in0, in1);
@@ -2051,8 +2040,6 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output,
       fadst16_sse2(in0, in1);
       write_buffer_16x16(output, in0, in1, 16);
       break;
-    default:
-      assert(0);
-      break;
+    default: assert(0); break;
   }
 }
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c
index 1a1d4eabc..fb2a92541 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -15,16 +15,12 @@
 #include "vpx_dsp/x86/inv_txfm_sse2.h"
 #include "vpx_dsp/x86/txfm_common_sse2.h"
 
-void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
-                             int16_t* coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t* zbin_ptr,
-                             const int16_t* round_ptr, const int16_t* quant_ptr,
-                             const int16_t* quant_shift_ptr,
-                             int16_t* qcoeff_ptr,
-                             int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                             uint16_t* eob_ptr,
-                             const int16_t* scan_ptr,
-                             const int16_t* iscan_ptr) {
+void vp9_fdct8x8_quant_ssse3(
+    const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
+    int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
+    const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+    int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+    uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
   __m128i zero;
   int pass;
   // Constants
@@ -42,14 +38,14 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
   const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
   const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
   // Load input
-  __m128i in0  = _mm_load_si128((const __m128i *)(input + 0 * stride));
-  __m128i in1  = _mm_load_si128((const __m128i *)(input + 1 * stride));
-  __m128i in2  = _mm_load_si128((const __m128i *)(input + 2 * stride));
-  __m128i in3  = _mm_load_si128((const __m128i *)(input + 3 * stride));
-  __m128i in4  = _mm_load_si128((const __m128i *)(input + 4 * stride));
-  __m128i in5  = _mm_load_si128((const __m128i *)(input + 5 * stride));
-  __m128i in6  = _mm_load_si128((const __m128i *)(input + 6 * stride));
-  __m128i in7  = _mm_load_si128((const __m128i *)(input + 7 * stride));
+  __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+  __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+  __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+  __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+  __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+  __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+  __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+  __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
   __m128i *in[8];
   int index = 0;
 
@@ -298,9 +294,9 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
 
       // Setup global values
       {
-        round = _mm_load_si128((const __m128i*)round_ptr);
-        quant = _mm_load_si128((const __m128i*)quant_ptr);
-        dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+        round = _mm_load_si128((const __m128i *)round_ptr);
+        quant = _mm_load_si128((const __m128i *)quant_ptr);
+        dequant = _mm_load_si128((const __m128i *)dequant_ptr);
       }
 
       {
@@ -332,15 +328,15 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
         qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
         qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
 
         coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
         dequant = _mm_unpackhi_epi64(dequant, dequant);
         coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
       }
 
       {
@@ -353,8 +349,8 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
         zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
         nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
         nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
         // Add one to convert from indices to counts
         iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
         iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -388,7 +384,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
         qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
         nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
-            _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
+                 _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
 
         if (nzflag) {
           qcoeff0 = _mm_adds_epi16(qcoeff0, round);
@@ -402,20 +398,20 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
           qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
           qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
 
           coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
           coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
         } else {
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
 
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
         }
       }
 
@@ -429,8 +425,8 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
         zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
         nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
         nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
         // Add one to convert from indices to counts
         iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
         iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -456,10 +452,10 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
     }
   } else {
     do {
-      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
-      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
       n_coeffs += 8 * 2;
     } while (n_coeffs < 0);
     *eob_ptr = 0;
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index 883507af3..91d0602f9 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -37,17 +37,11 @@ static INLINE int sum_diff_16x1(__m128i acc_diff) {
 }
 
 // Denoise a 16x1 vector.
-static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
-                                             const uint8_t *mc_running_avg_y,
-                                             uint8_t *running_avg_y,
-                                             const __m128i *k_0,
-                                             const __m128i *k_4,
-                                             const __m128i *k_8,
-                                             const __m128i *k_16,
-                                             const __m128i *l3,
-                                             const __m128i *l32,
-                                             const __m128i *l21,
-                                             __m128i acc_diff) {
+static INLINE __m128i vp9_denoiser_16x1_sse2(
+    const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y,
+    const __m128i *k_0, const __m128i *k_4, const __m128i *k_8,
+    const __m128i *k_16, const __m128i *l3, const __m128i *l32,
+    const __m128i *l21, __m128i acc_diff) {
   // Calculate differences
   const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
   const __m128i v_mc_running_avg_y =
@@ -69,7 +63,7 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
   __m128i adj2 = _mm_and_si128(mask2, *l32);
   const __m128i adj1 = _mm_and_si128(mask1, *l21);
   const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
-  __m128i adj,  padj, nadj;
+  __m128i adj, padj, nadj;
 
   // Combine the adjustments and get absolute adjustments.
   adj2 = _mm_add_epi8(adj2, adj1);
@@ -95,9 +89,8 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
 
 // Denoise a 16x1 vector with a weaker filter.
 static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
-    const uint8_t *sig, const uint8_t *mc_running_avg_y,
-    uint8_t *running_avg_y, const __m128i k_0,
-    const __m128i k_delta, __m128i acc_diff) {
+    const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y,
+    const __m128i k_0, const __m128i k_delta, __m128i acc_diff) {
   __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
   // Calculate differences.
   const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
@@ -108,8 +101,7 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
   // Obtain the sign. FF if diff is negative.
   const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
   // Clamp absolute difference to delta to get the adjustment.
-  const __m128i adj =
-      _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
+  const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
   // Restore the sign and get positive and negative adjustments.
   __m128i padj, nadj;
   padj = _mm_andnot_si128(diff_sign, adj);
@@ -126,14 +118,17 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
 }
 
 // Denoise 8x8 and 8x16 blocks.
-static int vp9_denoiser_NxM_sse2_small(
-    const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
-    int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
-    int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) {
+static int vp9_denoiser_NxM_sse2_small(const uint8_t *sig, int sig_stride,
+                                       const uint8_t *mc_running_avg_y,
+                                       int mc_avg_y_stride,
+                                       uint8_t *running_avg_y, int avg_y_stride,
+                                       int increase_denoising, BLOCK_SIZE bs,
+                                       int motion_magnitude, int width) {
   int sum_diff_thresh, r, sum_diff = 0;
-  const int shift_inc  = (increase_denoising &&
-                          motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
-                         1 : 0;
+  const int shift_inc =
+      (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
+          ? 1
+          : 0;
   uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
   __m128i acc_diff = _mm_setzero_si128();
   const __m128i k_0 = _mm_setzero_si128();
@@ -153,15 +148,13 @@ static int vp9_denoiser_NxM_sse2_small(
     memcpy(sig_buffer[r], sig, width);
     memcpy(sig_buffer[r] + width, sig + sig_stride, width);
     memcpy(mc_running_buffer[r], mc_running_avg_y, width);
-    memcpy(mc_running_buffer[r] + width,
-           mc_running_avg_y + mc_avg_y_stride, width);
+    memcpy(mc_running_buffer[r] + width, mc_running_avg_y + mc_avg_y_stride,
+           width);
     memcpy(running_buffer[r], running_avg_y, width);
     memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width);
-    acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
-                                      mc_running_buffer[r],
-                                      running_buffer[r],
-                                      &k_0, &k_4, &k_8, &k_16,
-                                      &l3, &l32, &l21, acc_diff);
+    acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r],
+                                      running_buffer[r], &k_0, &k_4, &k_8,
+                                      &k_16, &l3, &l32, &l21, acc_diff);
     memcpy(running_avg_y, running_buffer[r], width);
     memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
     // Update pointers for next iteration.
@@ -184,19 +177,19 @@ static int vp9_denoiser_NxM_sse2_small(
 
       // The delta is set by the excess of absolute pixel diff over the
       // threshold.
-      const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
-                         num_pels_log2_lookup[bs]) + 1;
+      const int delta =
+          ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1;
       // Only apply the adjustment for max delta up to 3.
       if (delta < 4) {
         const __m128i k_delta = _mm_set1_epi8(delta);
         running_avg_y -= avg_y_stride * (b_height << 1);
         for (r = 0; r < b_height; ++r) {
           acc_diff = vp9_denoiser_adj_16x1_sse2(
-              sig_buffer[r], mc_running_buffer[r], running_buffer[r],
-              k_0, k_delta, acc_diff);
+              sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_0,
+              k_delta, acc_diff);
           memcpy(running_avg_y, running_buffer[r], width);
-          memcpy(running_avg_y + avg_y_stride,
-                 running_buffer[r] + width, width);
+          memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width,
+                 width);
           // Update pointers for next iteration.
           running_avg_y += (avg_y_stride << 1);
         }
@@ -216,14 +209,14 @@ static int vp9_denoiser_NxM_sse2_small(
 static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
                                      const uint8_t *mc_running_avg_y,
                                      int mc_avg_y_stride,
-                                     uint8_t *running_avg_y,
-                                     int avg_y_stride,
+                                     uint8_t *running_avg_y, int avg_y_stride,
                                      int increase_denoising, BLOCK_SIZE bs,
                                      int motion_magnitude) {
   int sum_diff_thresh, r, c, sum_diff = 0;
-  const int shift_inc  = (increase_denoising &&
-                          motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
-                         1 : 0;
+  const int shift_inc =
+      (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
+          ? 1
+          : 0;
   __m128i acc_diff[4][4];
   const __m128i k_0 = _mm_setzero_si128();
   const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
@@ -248,9 +241,9 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
 
   for (r = 0; r < b_height; ++r) {
     for (c = 0; c < b_width_shift4; ++c) {
-      acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2(
-          sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
-          &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]);
+      acc_diff[c][r >> 4] = vp9_denoiser_16x1_sse2(
+          sig, mc_running_avg_y, running_avg_y, &k_0, &k_4, &k_8, &k_16, &l3,
+          &l32, &l21, acc_diff[c][r >> 4]);
       // Update pointers for next iteration.
       sig += 16;
       mc_running_avg_y += 16;
@@ -259,7 +252,7 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
 
     if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
       for (c = 0; c < b_width_shift4; ++c) {
-        sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
+        sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]);
       }
     }
 
@@ -272,8 +265,8 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
   {
     sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
     if (abs(sum_diff) > sum_diff_thresh) {
-      const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
-                         num_pels_log2_lookup[bs]) + 1;
+      const int delta =
+          ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1;
 
       // Only apply the adjustment for max delta up to 3.
       if (delta < 4) {
@@ -284,9 +277,9 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
         sum_diff = 0;
         for (r = 0; r < b_height; ++r) {
           for (c = 0; c < b_width_shift4; ++c) {
-            acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2(
-                sig, mc_running_avg_y, running_avg_y, k_0,
-                k_delta, acc_diff[c][r>>4]);
+            acc_diff[c][r >> 4] =
+                vp9_denoiser_adj_16x1_sse2(sig, mc_running_avg_y, running_avg_y,
+                                           k_0, k_delta, acc_diff[c][r >> 4]);
             // Update pointers for next iteration.
             sig += 16;
             mc_running_avg_y += 16;
@@ -295,7 +288,7 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
 
           if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
             for (c = 0; c < b_width_shift4; ++c) {
-              sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
+              sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]);
             }
           }
           sig = sig - b_width + sig_stride;
@@ -314,27 +307,21 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
 }
 
 int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
-                             const uint8_t *mc_avg,
-                             int mc_avg_stride,
+                             const uint8_t *mc_avg, int mc_avg_stride,
                              uint8_t *avg, int avg_stride,
-                             int increase_denoising,
-                             BLOCK_SIZE bs,
+                             int increase_denoising, BLOCK_SIZE bs,
                              int motion_magnitude) {
   // Rank by frequency of the block type to have an early termination.
   if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 ||
       bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 ||
       bs == BLOCK_32X64 || bs == BLOCK_64X32) {
-    return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
-                                     mc_avg, mc_avg_stride,
-                                     avg, avg_stride,
-                                     increase_denoising,
-                                     bs, motion_magnitude);
+    return vp9_denoiser_NxM_sse2_big(sig, sig_stride, mc_avg, mc_avg_stride,
+                                     avg, avg_stride, increase_denoising, bs,
+                                     motion_magnitude);
   } else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) {
-    return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
-                                       mc_avg, mc_avg_stride,
-                                       avg, avg_stride,
-                                       increase_denoising,
-                                       bs, motion_magnitude, 8);
+    return vp9_denoiser_NxM_sse2_small(sig, sig_stride, mc_avg, mc_avg_stride,
+                                       avg, avg_stride, increase_denoising, bs,
+                                       motion_magnitude, 8);
   } else {
     return COPY_BLOCK;
   }
diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
index cd3e87ec8..36bcf1536 100644
--- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -9,7 +9,7 @@
  */
 
 #if defined(_MSC_VER)
-# include <intrin.h>
+#include <intrin.h>
 #endif
 #include <emmintrin.h>
 #include <smmintrin.h>
@@ -19,11 +19,11 @@
 #include "vpx_ports/mem.h"
 
 #ifdef __GNUC__
-# define LIKELY(v)    __builtin_expect(v, 1)
-# define UNLIKELY(v)  __builtin_expect(v, 0)
+#define LIKELY(v) __builtin_expect(v, 1)
+#define UNLIKELY(v) __builtin_expect(v, 0)
 #else
-# define LIKELY(v)    (v)
-# define UNLIKELY(v)  (v)
+#define LIKELY(v) (v)
+#define UNLIKELY(v) (v)
 #endif
 
 static INLINE int_mv pack_int_mv(int16_t row, int16_t col) {
@@ -40,19 +40,19 @@ static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) {
   return mv.as_int == 0 ? 0 : 1;
 }
 
-static INLINE int mv_cost(const int_mv mv,
-                          const int *joint_cost, int *const comp_cost[2]) {
-  return joint_cost[get_mv_joint(mv)] +
-         comp_cost[0][mv.as_mv.row] + comp_cost[1][mv.as_mv.col];
+static INLINE int mv_cost(const int_mv mv, const int *joint_cost,
+                          int *const comp_cost[2]) {
+  return joint_cost[get_mv_joint(mv)] + comp_cost[0][mv.as_mv.row] +
+         comp_cost[1][mv.as_mv.col];
 }
 
 static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
                           int sad_per_bit) {
-  const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,
-                                  mv.as_mv.col - ref->col);
-  return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost,
-                                              x->nmvsadcost) *
-                                              sad_per_bit, VP9_PROB_COST_SHIFT);
+  const int_mv diff =
+      pack_int_mv(mv.as_mv.row - ref->row, mv.as_mv.col - ref->col);
+  return ROUND_POWER_OF_TWO(
+      (unsigned)mv_cost(diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
+      VP9_PROB_COST_SHIFT);
 }
 
 /*****************************************************************************
@@ -71,10 +71,9 @@ static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
  * which does not rely on these properties.                                  *
  *****************************************************************************/
 int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
-                               const search_site_config *cfg,
-                               MV *ref_mv, MV *best_mv, int search_param,
-                               int sad_per_bit, int *num00,
-                               const vp9_variance_fn_ptr_t *fn_ptr,
+                               const search_site_config *cfg, MV *ref_mv,
+                               MV *best_mv, int search_param, int sad_per_bit,
+                               int *num00, const vp9_variance_fn_ptr_t *fn_ptr,
                                const MV *center_mv) {
   const int_mv maxmv = pack_int_mv(x->mv_row_max, x->mv_col_max);
   const __m128i v_max_mv_w = _mm_set1_epi32(maxmv.as_int);
@@ -91,12 +90,12 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
   // 0 = initial step (MAX_FIRST_STEP) pel
   // 1 = (MAX_FIRST_STEP/2) pel,
   // 2 = (MAX_FIRST_STEP/4) pel...
-  const       MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param];
+  const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param];
   const intptr_t *ss_os = &cfg->ss_os[cfg->searches_per_step * search_param];
   const int tot_steps = cfg->total_steps - search_param;
 
-  const int_mv fcenter_mv = pack_int_mv(center_mv->row >> 3,
-                                        center_mv->col >> 3);
+  const int_mv fcenter_mv =
+      pack_int_mv(center_mv->row >> 3, center_mv->col >> 3);
   const __m128i vfcmv = _mm_set1_epi32(fcenter_mv.as_int);
 
   const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row);
@@ -109,8 +108,8 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
   const int what_stride = x->plane[0].src.stride;
   const int in_what_stride = x->e_mbd.plane[0].pre[0].stride;
   const uint8_t *const what = x->plane[0].src.buf;
-  const uint8_t *const in_what = x->e_mbd.plane[0].pre[0].buf +
-                                 ref_row * in_what_stride + ref_col;
+  const uint8_t *const in_what =
+      x->e_mbd.plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
 
   // Work out the start point for the search
   const uint8_t *best_address = in_what;
@@ -181,10 +180,9 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
         __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
         __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
         // Set the ones falling outside to zero
-        v_bo10_q = _mm_and_si128(v_bo10_q,
-                                 _mm_cvtepi32_epi64(v_inside_d));
-        v_bo32_q = _mm_and_si128(v_bo32_q,
-                                 _mm_unpackhi_epi32(v_inside_d, v_inside_d));
+        v_bo10_q = _mm_and_si128(v_bo10_q, _mm_cvtepi32_epi64(v_inside_d));
+        v_bo32_q =
+            _mm_and_si128(v_bo32_q, _mm_unpackhi_epi32(v_inside_d, v_inside_d));
         // Compute the candidate addresses
         v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
         v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
@@ -195,9 +193,8 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
 #endif
       }
 
-      fn_ptr->sdx4df(what, what_stride,
-                     (const uint8_t **)&v_blocka[0], in_what_stride,
-                     (uint32_t*)&v_sad_d);
+      fn_ptr->sdx4df(what, what_stride, (const uint8_t **)&v_blocka[0],
+                     in_what_stride, (uint32_t *)&v_sad_d);
 
       // Look up the component cost of the residual motion vector
       {
@@ -226,11 +223,10 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
 
       // Now add in the joint cost
       {
-        const __m128i v_sel_d = _mm_cmpeq_epi32(v_diff_mv_w,
-                                                _mm_setzero_si128());
-        const __m128i v_joint_cost_d = _mm_blendv_epi8(v_joint_cost_1_d,
-                                                       v_joint_cost_0_d,
-                                                       v_sel_d);
+        const __m128i v_sel_d =
+            _mm_cmpeq_epi32(v_diff_mv_w, _mm_setzero_si128());
+        const __m128i v_joint_cost_d =
+            _mm_blendv_epi8(v_joint_cost_1_d, v_joint_cost_0_d, v_sel_d);
         v_cost_d = _mm_add_epi32(v_cost_d, v_joint_cost_d);
       }
 
diff --git a/vp9/encoder/x86/vp9_error_intrin_avx2.c b/vp9/encoder/x86/vp9_error_intrin_avx2.c
index dfebaab0a..453af2a40 100644
--- a/vp9/encoder/x86/vp9_error_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_error_intrin_avx2.c
@@ -13,10 +13,8 @@
 #include "./vp9_rtcd.h"
 #include "vpx/vpx_integer.h"
 
-int64_t vp9_block_error_avx2(const int16_t *coeff,
-                             const int16_t *dqcoeff,
-                             intptr_t block_size,
-                             int64_t *ssz) {
+int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
+                             intptr_t block_size, int64_t *ssz) {
   __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
   __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
   __m256i sse_reg_64hi, ssz_reg_64hi;
@@ -29,7 +27,7 @@ int64_t vp9_block_error_avx2(const int16_t *coeff,
   sse_reg = _mm256_set1_epi16(0);
   ssz_reg = _mm256_set1_epi16(0);
 
-  for (i = 0 ; i < block_size ; i+= 16) {
+  for (i = 0; i < block_size; i += 16) {
     // load 32 bytes from coeff and dqcoeff
     coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
     dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
@@ -66,8 +64,8 @@ int64_t vp9_block_error_avx2(const int16_t *coeff,
                              _mm256_extractf128_si256(ssz_reg, 1));
 
   // store the results
-  _mm_storel_epi64((__m128i*)(&sse), sse_reg128);
+  _mm_storel_epi64((__m128i *)(&sse), sse_reg128);
 
-  _mm_storel_epi64((__m128i*)(ssz), ssz_reg128);
+  _mm_storel_epi64((__m128i *)(ssz), ssz_reg128);
   return sse;
 }
diff --git a/vp9/encoder/x86/vp9_frame_scale_ssse3.c b/vp9/encoder/x86/vp9_frame_scale_ssse3.c
index 23325d63b..fa2a6449b 100644
--- a/vp9/encoder/x86/vp9_frame_scale_ssse3.c
+++ b/vp9/encoder/x86/vp9_frame_scale_ssse3.c
@@ -19,23 +19,22 @@ extern void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
                                          YV12_BUFFER_CONFIG *dst);
 
 static void downsample_2_to_1_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                                    uint8_t *dst, ptrdiff_t dst_stride,
-                                    int w, int h) {
+                                    uint8_t *dst, ptrdiff_t dst_stride, int w,
+                                    int h) {
   const __m128i mask = _mm_set1_epi16(0x00FF);
   const int max_width = w & ~15;
   int y;
   for (y = 0; y < h; ++y) {
     int x;
     for (x = 0; x < max_width; x += 16) {
-      const __m128i a = _mm_loadu_si128((const __m128i *)(src + x * 2 +  0));
+      const __m128i a = _mm_loadu_si128((const __m128i *)(src + x * 2 + 0));
       const __m128i b = _mm_loadu_si128((const __m128i *)(src + x * 2 + 16));
       const __m128i a_and = _mm_and_si128(a, mask);
       const __m128i b_and = _mm_and_si128(b, mask);
       const __m128i c = _mm_packus_epi16(a_and, b_and);
       _mm_storeu_si128((__m128i *)(dst + x), c);
     }
-    for (; x < w; ++x)
-      dst[x] = src[x * 2];
+    for (; x < w; ++x) dst[x] = src[x * 2];
     src += src_stride * 2;
     dst += dst_stride;
   }
@@ -47,9 +46,8 @@ static INLINE __m128i filter(const __m128i *const a, const __m128i *const b,
                              const __m128i *const g, const __m128i *const h) {
   const __m128i coeffs_ab =
       _mm_set_epi8(6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1);
-  const __m128i coeffs_cd =
-      _mm_set_epi8(78, -19, 78, -19, 78, -19, 78, -19, 78, -19, 78, -19,
-                   78, -19, 78, -19);
+  const __m128i coeffs_cd = _mm_set_epi8(78, -19, 78, -19, 78, -19, 78, -19, 78,
+                                         -19, 78, -19, 78, -19, 78, -19);
   const __m128i const64_x16 = _mm_set1_epi16(64);
   const __m128i ab = _mm_unpacklo_epi8(*a, *b);
   const __m128i cd = _mm_unpacklo_epi8(*c, *d);
@@ -88,8 +86,8 @@ static void eight_tap_row_ssse3(const uint8_t *src, uint8_t *dst, int w) {
 }
 
 static void upsample_1_to_2_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                                  uint8_t *dst, ptrdiff_t dst_stride,
-                                  int dst_w, int dst_h) {
+                                  uint8_t *dst, ptrdiff_t dst_stride, int dst_w,
+                                  int dst_h) {
   dst_w /= 2;
   dst_h /= 2;
   {
@@ -116,7 +114,7 @@ static void upsample_1_to_2_ssse3(const uint8_t *src, ptrdiff_t src_stride,
       int x;
       eight_tap_row_ssse3(src + src_stride * 4 - 3, tmp7, dst_w);
       for (x = 0; x < max_width; x += 8) {
-        const __m128i A = _mm_loadl_epi64((const __m128i *)(src  + x));
+        const __m128i A = _mm_loadl_epi64((const __m128i *)(src + x));
         const __m128i B = _mm_loadl_epi64((const __m128i *)(tmp3 + x));
         const __m128i AB = _mm_unpacklo_epi8(A, B);
         __m128i C, D, CD;
@@ -179,23 +177,23 @@ void vp9_scale_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
   const int dst_uv_h = dst_h / 2;
 
   if (dst_w * 2 == src_w && dst_h * 2 == src_h) {
-    downsample_2_to_1_ssse3(src->y_buffer, src->y_stride,
-                            dst->y_buffer, dst->y_stride, dst_w, dst_h);
-    downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride,
-                            dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
-    downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride,
-                            dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
+    downsample_2_to_1_ssse3(src->y_buffer, src->y_stride, dst->y_buffer,
+                            dst->y_stride, dst_w, dst_h);
+    downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride, dst->u_buffer,
+                            dst->uv_stride, dst_uv_w, dst_uv_h);
+    downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride, dst->v_buffer,
+                            dst->uv_stride, dst_uv_w, dst_uv_h);
     vpx_extend_frame_borders(dst);
   } else if (dst_w == src_w * 2 && dst_h == src_h * 2) {
     // The upsample() supports widths up to 1920 * 2.  If greater, fall back
     // to vp9_scale_and_extend_frame_c().
-    if (dst_w/2 <= 1920) {
-      upsample_1_to_2_ssse3(src->y_buffer, src->y_stride,
-                            dst->y_buffer, dst->y_stride, dst_w, dst_h);
-      upsample_1_to_2_ssse3(src->u_buffer, src->uv_stride,
-                            dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
-      upsample_1_to_2_ssse3(src->v_buffer, src->uv_stride,
-                            dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
+    if (dst_w / 2 <= 1920) {
+      upsample_1_to_2_ssse3(src->y_buffer, src->y_stride, dst->y_buffer,
+                            dst->y_stride, dst_w, dst_h);
+      upsample_1_to_2_ssse3(src->u_buffer, src->uv_stride, dst->u_buffer,
+                            dst->uv_stride, dst_uv_w, dst_uv_h);
+      upsample_1_to_2_ssse3(src->v_buffer, src->uv_stride, dst->v_buffer,
+                            dst->uv_stride, dst_uv_w, dst_uv_h);
       vpx_extend_frame_borders(dst);
     } else {
       vp9_scale_and_extend_frame_c(src, dst);
diff --git a/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c
index c245ccafa..91f627c34 100644
--- a/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c
@@ -23,41 +23,41 @@ int64_t vp9_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
   const int shift = 2 * (bps - 8);
   const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
 
-  for (i = 0; i < block_size; i+=8) {
+  for (i = 0; i < block_size; i += 8) {
     // Load the data into xmm registers
-    __m128i mm_coeff = _mm_load_si128((__m128i*) (coeff + i));
-    __m128i mm_coeff2 = _mm_load_si128((__m128i*) (coeff + i + 4));
-    __m128i mm_dqcoeff = _mm_load_si128((__m128i*) (dqcoeff + i));
-    __m128i mm_dqcoeff2 = _mm_load_si128((__m128i*) (dqcoeff + i + 4));
+    __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
+    __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
+    __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
+    __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
     // Check if any values require more than 15 bit
     max = _mm_set1_epi32(0x3fff);
     min = _mm_set1_epi32(0xffffc000);
     cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
-            _mm_cmplt_epi32(mm_coeff, min));
+                         _mm_cmplt_epi32(mm_coeff, min));
     cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
-            _mm_cmplt_epi32(mm_coeff2, min));
+                         _mm_cmplt_epi32(mm_coeff2, min));
     cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
-            _mm_cmplt_epi32(mm_dqcoeff, min));
+                         _mm_cmplt_epi32(mm_dqcoeff, min));
     cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
-            _mm_cmplt_epi32(mm_dqcoeff2, min));
-    test = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(cmp0, cmp1),
-            _mm_or_si128(cmp2, cmp3)));
+                         _mm_cmplt_epi32(mm_dqcoeff2, min));
+    test = _mm_movemask_epi8(
+        _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
 
     if (!test) {
-      __m128i mm_diff, error_sse2, sqcoeff_sse2;;
+      __m128i mm_diff, error_sse2, sqcoeff_sse2;
       mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
       mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
       mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
       error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
       sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
-      _mm_storeu_si128((__m128i*)temp, error_sse2);
+      _mm_storeu_si128((__m128i *)temp, error_sse2);
       error = error + temp[0] + temp[1] + temp[2] + temp[3];
-      _mm_storeu_si128((__m128i*)temp, sqcoeff_sse2);
+      _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
       sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
     } else {
       for (j = 0; j < 8; j++) {
         const int64_t diff = coeff[i + j] - dqcoeff[i + j];
-        error +=  diff * diff;
+        error += diff * diff;
         sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
       }
     }
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
index 2071dfe3c..3f8ee5f24 100644
--- a/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -14,14 +14,13 @@
 #include "./vp9_rtcd.h"
 #include "vpx/vpx_integer.h"
 
-void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
-                          int skip_block, const int16_t* zbin_ptr,
-                          const int16_t* round_ptr, const int16_t* quant_ptr,
-                          const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
-                          int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                          uint16_t* eob_ptr,
-                          const int16_t* scan_ptr,
-                          const int16_t* iscan_ptr) {
+void vp9_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
+                          int skip_block, const int16_t *zbin_ptr,
+                          const int16_t *round_ptr, const int16_t *quant_ptr,
+                          const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
+                          int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                          uint16_t *eob_ptr, const int16_t *scan_ptr,
+                          const int16_t *iscan_ptr) {
   __m128i zero;
   __m128i thr;
   int16_t nzflag;
@@ -44,9 +43,9 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
 
       // Setup global values
       {
-        round = _mm_load_si128((const __m128i*)round_ptr);
-        quant = _mm_load_si128((const __m128i*)quant_ptr);
-        dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+        round = _mm_load_si128((const __m128i *)round_ptr);
+        quant = _mm_load_si128((const __m128i *)quant_ptr);
+        dequant = _mm_load_si128((const __m128i *)dequant_ptr);
       }
 
       {
@@ -54,8 +53,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
         __m128i qcoeff0, qcoeff1;
         __m128i qtmp0, qtmp1;
         // Do DC and first 15 AC
-        coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
-        coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+        coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
+        coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
 
         // Poor man's sign extract
         coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -78,15 +77,15 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
         qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
         qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
 
         coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
         dequant = _mm_unpackhi_epi64(dequant, dequant);
         coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
       }
 
       {
@@ -99,8 +98,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
         zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
         nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
         nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
         // Add one to convert from indices to counts
         iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
         iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -121,8 +120,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
         __m128i qcoeff0, qcoeff1;
         __m128i qtmp0, qtmp1;
 
-        coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
-        coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+        coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
+        coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
 
         // Poor man's sign extract
         coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -133,7 +132,7 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
         qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
         nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
-            _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
+                 _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
 
         if (nzflag) {
           qcoeff0 = _mm_adds_epi16(qcoeff0, round);
@@ -147,20 +146,20 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
           qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
           qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
 
           coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
           coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
         } else {
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
-          _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
 
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
-          _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
         }
       }
 
@@ -174,8 +173,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
         zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
         nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
         nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
         // Add one to convert from indices to counts
         iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
         iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -200,10 +199,10 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
     }
   } else {
     do {
-      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
-      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
       n_coeffs += 8 * 2;
     } while (n_coeffs < 0);
     *eob_ptr = 0;
author	clang-format <noreply@google.com>	2016-07-26 20:43:23 -0700
committer	James Zern <jzern@google.com>	2016-08-02 16:47:11 -0700
commit	e0cc52db3fc9b09c99d7bbee35153cf82964a860 (patch)
tree	4988f1d3a21056339e2ffbd7a3b3d52fab54cb6b /vp9/encoder/x86
parent	3a04c9c9c4c4935925f4c00dcc70610100c5e9dd (diff)
download	libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.gz libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.bz2 libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.zip