18 files changed, 349 insertions, 273 deletions
diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
index 75d52dbeb..c5ec824b3 100644
--- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm
+++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
@@ -18,15 +18,23 @@
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
 
-;void vp8_intra4x4_predict_armv6(unsigned char *src, int src_stride, int b_mode,
-;                                unsigned char *dst, int dst_stride)
-
+;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
+;                                B_PREDICTION_MODE left_stride, int b_mode,
+;                                unsigned char *dst, int dst_stride,
+;                                unsigned char top_left)
+
+; r0: *Above
+; r1: *yleft
+; r2: left_stride
+; r3: b_mode
+; sp + #40: dst
+; sp + #44: dst_stride
+; sp + #48: top_left
 |vp8_intra4x4_predict_armv6| PROC
     push        {r4-r12, lr}
 
-
-    cmp         r2, #10
-    addlt       pc, pc, r2, lsl #2       ; position independent switch
+    cmp         r3, #10
+    addlt       pc, pc, r3, lsl #2       ; position independent switch
     pop         {r4-r12, pc}             ; default
     b           b_dc_pred
     b           b_tm_pred
@@ -41,13 +49,13 @@
 
 b_dc_pred
     ; load values
-    ldr         r8, [r0, -r1]            ; Above
-    ldrb        r4, [r0, #-1]!           ; Left[0]
+    ldr         r8, [r0]                 ; Above
+    ldrb        r4, [r1], r2             ; Left[0]
     mov         r9, #0
-    ldrb        r5, [r0, r1]             ; Left[1]
-    ldrb        r6, [r0, r1, lsl #1]!    ; Left[2]
+    ldrb        r5, [r1], r2             ; Left[1]
+    ldrb        r6, [r1], r2             ; Left[2]
     usad8       r12, r8, r9
-    ldrb        r7, [r0, r1]             ; Left[3]
+    ldrb        r7, [r1]                 ; Left[3]
 
     ; calculate dc
     add         r4, r4, r5
@@ -55,31 +63,30 @@ b_dc_pred
     add         r4, r4, r7
     add         r4, r4, r12
     add         r4, r4, #4
-    ldr         r0, [sp, #40]           ; load stride
+    ldr         r0, [sp, #44]           ; dst_stride
     mov         r12, r4, asr #3         ; (expected_dc + 4) >> 3
 
     add         r12, r12, r12, lsl #8
-    add         r3, r3, r0
+    ldr         r3, [sp, #40]           ; dst
     add         r12, r12, r12, lsl #16
 
     ; store values
-    str         r12, [r3, -r0]
+    str         r12, [r3], r0
+    str         r12, [r3], r0
+    str         r12, [r3], r0
     str         r12, [r3]
-    str         r12, [r3, r0]
-    str         r12, [r3, r0, lsl #1]
 
     pop        {r4-r12, pc}
 
 b_tm_pred
-    sub         r10, r0, #1             ; Left
-    ldr         r8, [r0, -r1]           ; Above
-    ldrb        r9, [r10, -r1]          ; top_left
-    ldrb        r4, [r0, #-1]!          ; Left[0]
-    ldrb        r5, [r10, r1]!          ; Left[1]
-    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
-    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
-    ldr         r0, [sp, #40]           ; load stride
-
+    ldr         r8, [r0]                ; Above
+    ldrb        r9, [sp, #48]           ; top_left
+    ldrb        r4, [r1], r2            ; Left[0]
+    ldrb        r5, [r1], r2            ; Left[1]
+    ldrb        r6, [r1], r2            ; Left[2]
+    ldrb        r7, [r1]                ; Left[3]
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     add         r9, r9, r9, lsl #16     ; [tl|tl]
     uxtb16      r10, r8                 ; a[2|0]
@@ -126,25 +133,26 @@ b_tm_pred
     str         r12, [r3], r0
 
     add         r12, r4, r5, lsl #8     ; [3|2|1|0]
-    str         r12, [r3], r0
+    str         r12, [r3]
 
     pop        {r4-r12, pc}
 
 b_ve_pred
-    ldr         r8, [r0, -r1]!          ; a[3|2|1|0]
+    ldr         r8, [r0]                ; a[3|2|1|0]
     ldr         r11, c00FF00FF
-    ldrb        r9, [r0, #-1]           ; top_left
+    ldrb        r9, [sp, #48]           ; top_left
     ldrb        r10, [r0, #4]           ; a[4]
 
     ldr         r0, c00020002
 
     uxtb16      r4, r8                  ; a[2|0]
     uxtb16      r5, r8, ror #8          ; a[3|1]
-    ldr         r2, [sp, #40]           ; stride
+    ldr         r2, [sp, #44]           ; dst_stride
     pkhbt       r9, r9, r5, lsl #16     ; a[1|-1]
 
     add         r9, r9, r4, lsl #1      ;[a[1]+2*a[2]       | tl+2*a[0]       ]
     uxtab16     r9, r9, r5              ;[a[1]+2*a[2]+a[3]  | tl+2*a[0]+a[1]  ]
+    ldr         r3, [sp, #40]           ; dst
     uxtab16     r9, r9, r0              ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
 
     add         r0, r0, r10, lsl #16    ;[a[4]+2            |                 2]
@@ -154,25 +162,23 @@ b_ve_pred
 
     and         r9, r11, r9, asr #2
     and         r4, r11, r4, asr #2
-    add         r3, r3, r2              ; dst + dst_stride
     add         r9, r9, r4, lsl #8
 
     ; store values
-    str         r9, [r3, -r2]
+    str         r9, [r3], r2
+    str         r9, [r3], r2
+    str         r9, [r3], r2
     str         r9, [r3]
-    str         r9, [r3, r2]
-    str         r9, [r3, r2, lsl #1]
 
     pop        {r4-r12, pc}
 
 
 b_he_pred
-    sub         r10, r0, #1             ; Left
-    ldrb        r4, [r0, #-1]!          ; Left[0]
-    ldrb        r8, [r10, -r1]          ; top_left
-    ldrb        r5, [r10, r1]!          ; Left[1]
-    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
-    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
+    ldrb        r4, [r1], r2            ; Left[0]
+    ldrb        r8, [sp, #48]           ; top_left
+    ldrb        r5, [r1], r2            ; Left[1]
+    ldrb        r6, [r1], r2            ; Left[2]
+    ldrb        r7, [r1]                ; Left[3]
 
     add         r8, r8, r4              ; tl   + l[0]
     add         r9, r4, r5              ; l[0] + l[1]
@@ -197,7 +203,8 @@ b_he_pred
     pkhtb       r10, r10, r10, asr #16  ; l[-|2|-|2]
     pkhtb       r11, r11, r11, asr #16  ; l[-|3|-|3]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     add         r8, r8, r8, lsl #8      ; l[0|0|0|0]
     add         r9, r9, r9, lsl #8      ; l[1|1|1|1]
@@ -206,16 +213,16 @@ b_he_pred
 
     ; store values
     str         r8, [r3], r0
-    str         r9, [r3]
-    str         r10, [r3, r0]
-    str         r11, [r3, r0, lsl #1]
+    str         r9, [r3], r0
+    str         r10, [r3], r0
+    str         r11, [r3]
 
     pop        {r4-r12, pc}
 
 b_ld_pred
-    ldr         r4, [r0, -r1]!          ; Above
+    ldr         r4, [r0]                ; Above[0-3]
     ldr         r12, c00020002
-    ldr         r5, [r0, #4]
+    ldr         r5, [r0, #4]            ; Above[4-7]
     ldr         lr,  c00FF00FF
 
     uxtb16      r6, r4                  ; a[2|0]
@@ -225,7 +232,6 @@ b_ld_pred
     pkhtb       r10, r6, r8             ; a[2|4]
     pkhtb       r11, r7, r9             ; a[3|5]
 
-
     add         r4, r6, r7, lsl #1      ; [a2+2*a3      |      a0+2*a1]
     add         r4, r4, r10, ror #16    ; [a2+2*a3+a4   |   a0+2*a1+a2]
     uxtab16     r4, r4, r12             ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
@@ -244,7 +250,8 @@ b_ld_pred
     add         r7, r7, r9, asr #16     ; [                 a5+2*a6+a7]
     uxtah       r7, r7, r12             ; [               a5+2*a6+a7+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r4, lr, r4, asr #2
@@ -266,18 +273,17 @@ b_ld_pred
     mov         r6, r6, lsr #16
     mov         r11, r10, lsr #8
     add         r11, r11, r6, lsl #24   ; [6|5|4|3]
-    str         r11, [r3], r0
+    str         r11, [r3]
 
     pop        {r4-r12, pc}
 
 b_rd_pred
-    sub         r12, r0, r1             ; Above = src - src_stride
-    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
-    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
-    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
-    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
-    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
-    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
+    ldrb        r4, [r1], r2            ; l[3] = pp[0]
 
 
     uxtb16      r9, lr                  ; p[7|5]
@@ -307,7 +313,8 @@ b_rd_pred
     add         r7, r7, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
     uxtab16     r7, r7, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r7, lr, r7, asr #2
@@ -328,18 +335,17 @@ b_rd_pred
 
     mov         r11, r10, lsl #8        ; [3|2|1|-]
     uxtab       r11, r11, r4            ; [3|2|1|0]
-    str         r11, [r3], r0
+    str         r11, [r3]
 
     pop        {r4-r12, pc}
 
 b_vr_pred
-    sub         r12, r0, r1             ; Above = src - src_stride
-    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
-    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
-    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
-    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
-    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
-    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
+    ldrb        r4, [r1]                ; l[3] = pp[0]
 
     add         r5, r5, r7, lsl #16     ; p[3|1]
     add         r6, r6, r8, lsl #16     ; p[4|2]
@@ -376,7 +382,8 @@ b_vr_pred
     add         r8, r8, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
     uxtab16     r8, r8, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r5, lr, r5, asr #2      ; [B|A]
@@ -397,14 +404,14 @@ b_vr_pred
     pkhtb       r10, r7, r5, asr #16    ; [-|H|-|B]
     str         r2, [r3], r0
     add         r12, r12, r10, lsl #8   ; [H|D|B|A]
-    str         r12, [r3], r0
+    str         r12, [r3]
 
     pop        {r4-r12, pc}
 
 b_vl_pred
-    ldr         r4, [r0, -r1]!          ; [3|2|1|0]
+    ldr         r4, [r0]                ; [3|2|1|0] = Above[0-3]
     ldr         r12, c00020002
-    ldr         r5, [r0, #4]            ; [7|6|5|4]
+    ldr         r5, [r0, #4]            ; [7|6|5|4] = Above[4-7]
     ldr         lr,  c00FF00FF
     ldr         r2,  c00010001
 
@@ -441,7 +448,8 @@ b_vl_pred
     add         r9, r9, r11             ; [p5+2*p6+p7   |   p3+2*p4+p5]
     uxtab16     r9, r9, r12             ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r5, lr, r5, asr #2      ; [D|C]
@@ -449,7 +457,6 @@ b_vl_pred
     and         r8, lr, r8, asr #2      ; [I|D]
     and         r9, lr, r9, asr #2      ; [J|H]
 
-
     add         r10, r4, r6, lsl #8     ; [F|B|E|A]
     str         r10, [r3], r0
 
@@ -463,18 +470,17 @@ b_vl_pred
     str         r12, [r3], r0
 
     add         r10, r7, r10, lsl #8    ; [J|H|D|G]
-    str         r10, [r3], r0
+    str         r10, [r3]
 
     pop        {r4-r12, pc}
 
 b_hd_pred
-    sub         r12, r0, r1             ; Above = src - src_stride
-    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
-    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
-    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
-    ldrb        r6, [r0, r1]            ; l[1] = pp[2]
-    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
-    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
+    ldrb        r4, [r1]                ; l[3] = pp[0]
 
     uxtb16      r9, lr                  ; p[7|5]
     uxtb16      r10, lr, ror #8         ; p[8|6]
@@ -492,7 +498,6 @@ b_hd_pred
     pkhtb       r1, r9, r10             ; p[7|6]
     pkhbt       r10, r8, r10, lsl #16   ; p[6|5]
 
-
     uadd16      r11, r4, r5             ; [p1+p2        |        p0+p1]
     uhadd16     r11, r11, r2            ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
                                         ; [B|A]
@@ -518,7 +523,8 @@ b_hd_pred
     and         r5, lr, r5, asr #2      ; [H|G]
     and         r6, lr, r6, asr #2      ; [J|I]
 
-    ldr         lr, [sp, #40]           ; stride
+    ldr         lr, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     pkhtb       r2, r0, r6              ; [-|F|-|I]
     pkhtb       r12, r6, r5, asr #16    ; [-|J|-|H]
@@ -527,7 +533,6 @@ b_hd_pred
     mov         r12, r12, ror #24       ; [J|I|H|F]
     str         r12, [r3], lr
 
-
     mov         r7, r11, asr #16        ; [-|-|-|B]
     str         r2, [r3], lr
     add         r7, r7, r0, lsl #16     ; [-|E|-|B]
@@ -536,21 +541,20 @@ b_hd_pred
     str         r7, [r3], lr
 
     add         r5, r11, r4, lsl #8     ; [D|B|C|A]
-    str         r5, [r3], lr
+    str         r5, [r3]
 
     pop        {r4-r12, pc}
 
 
 
 b_hu_pred
-    ldrb        r4, [r0, #-1]!          ; Left[0]
+    ldrb        r4, [r1], r2            ; Left[0]
     ldr         r12, c00020002
-    ldrb        r5, [r0, r1]!           ; Left[1]
+    ldrb        r5, [r1], r2            ; Left[1]
     ldr         lr,  c00FF00FF
-    ldrb        r6, [r0, r1]!           ; Left[2]
+    ldrb        r6, [r1], r2            ; Left[2]
     ldr         r2,  c00010001
-    ldrb        r7, [r0, r1]            ; Left[3]
-
+    ldrb        r7, [r1]                ; Left[3]
 
     add         r4, r4, r5, lsl #16     ; [1|0]
     add         r5, r5, r6, lsl #16     ; [2|1]
@@ -563,7 +567,8 @@ b_hu_pred
     add         r4, r4, r5, lsl #1      ; [p1+2*p2      |      p0+2*p1]
     add         r4, r4, r9              ; [p1+2*p2+p3   |   p0+2*p1+p2]
     uxtab16     r4, r4, r12             ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
-    ldr         r2, [sp, #40]           ; stride
+    ldr         r2, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
     and         r4, lr, r4, asr #2      ; [D|C]
 
     add         r10, r6, r7             ; [p2+p3]
@@ -587,9 +592,9 @@ b_hu_pred
 
     add         r10, r11, lsl #8        ; [-|-|F|E]
     add         r10, r10, r9, lsl #16   ; [G|G|F|E]
-    str         r10, [r3]
+    str         r10, [r3], r2
 
-    str         r7, [r3, r2]
+    str         r7, [r3]
 
     pop        {r4-r12, pc}
 
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 72375fd36..7bb8d0ac1 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -15,7 +15,7 @@
 
 void vp8_intra4x4_predict_c(unsigned char *Above,
                             unsigned char *yleft, int left_stride,
-                            int b_mode,
+                            B_PREDICTION_MODE b_mode,
                             unsigned char *dst, int dst_stride,
                             unsigned char top_left)
 {
@@ -290,6 +290,8 @@ void vp8_intra4x4_predict_c(unsigned char *Above,
     }
     break;
 
+    default:
+    break;
 
     }
 }
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index e933376e0..f0bdf29be 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -1,5 +1,7 @@
 common_forward_decls() {
 cat <<EOF
+#include "vp8/common/blockd.h"
+
 struct blockd;
 struct macroblockd;
 struct loop_filter_info;
@@ -144,8 +146,9 @@ specialize vp8_build_intra_predictors_mby_s sse2 ssse3
 prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
 specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
 
-prototype void vp8_intra4x4_predict "unsigned char *above, unsigned char *left, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-# No existing specializations
+prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
+specialize vp8_intra4x4_predict media
+vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
 
 #
 # Postproc
@@ -293,23 +296,23 @@ vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
 #
 # Single block SAD
 #
-prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
 specialize vp8_sad4x4 mmx sse2 neon
 vp8_sad4x4_sse2=vp8_sad4x4_wmt
 
-prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
 specialize vp8_sad8x8 mmx sse2 neon
 vp8_sad8x8_sse2=vp8_sad8x8_wmt
 
-prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
 specialize vp8_sad8x16 mmx sse2 neon
 vp8_sad8x16_sse2=vp8_sad8x16_wmt
 
-prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
 specialize vp8_sad16x8 mmx sse2 neon
 vp8_sad16x8_sse2=vp8_sad16x8_wmt
 
-prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
 specialize vp8_sad16x16 mmx sse2 sse3 media neon
 vp8_sad16x16_sse2=vp8_sad16x16_wmt
 vp8_sad16x16_media=vp8_sad16x16_armv6
@@ -317,59 +320,59 @@ vp8_sad16x16_media=vp8_sad16x16_armv6
 #
 # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
 #
-prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad4x4x3 sse3
 
-prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad8x8x3 sse3
 
-prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad8x16x3 sse3
 
-prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad16x8x3 sse3 ssse3
 
-prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad16x16x3 sse3 ssse3
 
 # Note the only difference in the following prototypes is that they return into
 # an array of short
-prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
+prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
 specialize vp8_sad4x4x8 sse4_1
 vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
 
-prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
+prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
 specialize vp8_sad8x8x8 sse4_1
 vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
 
-prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
+prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
 specialize vp8_sad8x16x8 sse4_1
 vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
 
-prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
+prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
 specialize vp8_sad16x8x8 sse4_1
 vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
 
-prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
+prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
 specialize vp8_sad16x16x8 sse4_1
 vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
 
 #
 # Multi-block SAD, comparing a reference to N independent blocks
 #
-prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int source_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad4x4x4d sse3
 
-prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int source_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad8x8x4d sse3
 
-prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int source_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad8x16x4d sse3
 
-prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int source_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad16x8x4d sse3
 
-prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int source_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp8_sad16x16x4d sse3
 
 #
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 880c18522..1a08c057b 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -55,7 +55,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
         int loop_end, x; \
         size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \
         \
-        x = shift + CHAR_BIT - bits_left; \
+        x = (int)(shift + CHAR_BIT - bits_left); \
         loop_end = 0; \
         if(x >= 0) \
         { \
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 2d497b940..ed2d34574 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -188,7 +188,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
             {
                 BLOCKD *b = &xd->block[i];
                 unsigned char *dst = xd->dst.y_buffer + b->offset;
-                int b_mode = xd->mode_info_context->bmi[i].as_mode;
+                B_PREDICTION_MODE b_mode =
+                    xd->mode_info_context->bmi[i].as_mode;
                 unsigned char *Above = dst - dst_stride;
                 unsigned char *yleft = dst - 1;
                 int left_stride = dst_stride;
@@ -545,13 +546,13 @@ static unsigned int read_available_partition_size(
         if (read_is_valid(partition_size_ptr, 3, first_fragment_end))
             partition_size = read_partition_size(partition_size_ptr);
         else if (pbi->ec_active)
-            partition_size = bytes_left;
+            partition_size = (unsigned int)bytes_left;
         else
             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                                "Truncated partition size data");
     }
     else
-        partition_size = bytes_left;
+        partition_size = (unsigned int)bytes_left;
 
     /* Validate the calculated partition length. If the buffer
      * described by the partition can't be fully read, then restrict
@@ -560,7 +561,7 @@ static unsigned int read_available_partition_size(
     if (!read_is_valid(fragment_start, partition_size, fragment_end))
     {
         if (pbi->ec_active)
-            partition_size = bytes_left;
+            partition_size = (unsigned int)bytes_left;
         else
             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                                "Truncated packet or corrupt partition "
@@ -606,10 +607,10 @@ static void setup_token_decoder(VP8D_COMP *pbi,
             /* Size of first partition + token partition sizes element */
             ptrdiff_t ext_first_part_size = token_part_sizes -
                 pbi->fragments[0] + 3 * (num_token_partitions - 1);
-            fragment_size -= ext_first_part_size;
+            fragment_size -= (unsigned int)ext_first_part_size;
             if (fragment_size > 0)
             {
-                pbi->fragment_sizes[0] = ext_first_part_size;
+                pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size;
                 /* The fragment contains an additional partition. Move to
                  * next. */
                 fragment_idx++;
@@ -628,8 +629,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
                                                  fragment_end,
                                                  fragment_idx - 1,
                                                  num_token_partitions);
-            pbi->fragment_sizes[fragment_idx] = partition_size;
-            fragment_size -= partition_size;
+            pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size;
+            fragment_size -= (unsigned int)partition_size;
             assert(fragment_idx <= num_token_partitions);
             if (fragment_size > 0)
             {
@@ -895,7 +896,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     init_frame(pbi);
 
-    if (vp8dx_start_decode(bc, data, data_end - data))
+    if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data)))
         vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate bool decoder 0");
     if (pc->frame_type == KEY_FRAME) {
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index cbc6a4328..dce1e4cd6 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -177,7 +177,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
             {
                 BLOCKD *b = &xd->block[i];
                 unsigned char *dst = xd->dst.y_buffer + b->offset;
-                int b_mode = xd->mode_info_context->bmi[i].as_mode;
+                B_PREDICTION_MODE b_mode =
+                    xd->mode_info_context->bmi[i].as_mode;
                 unsigned char *Above;
                 unsigned char *yleft;
                 int left_stride;
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 92a7e067b..27991433b 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -397,7 +397,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
         {
             const TOKENEXTRA *p    = cpi->tplist[mb_row].start;
             const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
-            int tokens = stop - p;
+            int tokens = (int)(stop - p);
 
             vp8_pack_tokens_c(w, p, tokens);
         }
@@ -416,7 +416,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
     {
         const TOKENEXTRA *p    = cpi->tplist[mb_row].start;
         const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
-        int tokens = stop - p;
+        int tokens = (int)(stop - p);
 
         vp8_pack_tokens_c(w, p, tokens);
     }
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 6bdd5c26e..d6b03e63a 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -256,8 +256,9 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
         mv_row = x->best_sse_mv.as_mv.row;
 
         if (frame == INTRA_FRAME ||
-            (mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD &&
-             sse_diff < SSE_DIFF_THRESHOLD))
+            ((unsigned int)(mv_row *mv_row + mv_col *mv_col)
+              <= NOISE_MOTION_THRESHOLD &&
+             sse_diff < (int)SSE_DIFF_THRESHOLD))
         {
             /*
              * Handle intra blocks as referring to last frame with zero motion
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 7ff693cd6..4d73d470f 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -823,7 +823,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
 
             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
             {
-                cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
+                cpi->tok_count += (unsigned int)
+                  (cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start);
             }
 
             if (xd->segmentation_enabled)
@@ -867,7 +868,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
             }
 
-            cpi->tok_count = tp - cpi->tok;
+            cpi->tok_count = (unsigned int)(tp - cpi->tok);
         }
 
 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 919bc70a0..e0bb1b09f 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -59,8 +59,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
     MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
     ENTROPY_CONTEXT_PLANES mb_row_left_context;
 
-    const int nsync = cpi->mt_sync_range;
-
     while (1)
     {
         if (cpi->b_multi_threaded == 0)
@@ -68,6 +66,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
         if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0)
         {
+            const int nsync = cpi->mt_sync_range;
             VP8_COMMON *cm = &cpi->common;
             int mb_row;
             MACROBLOCK *x = &mbri->mb;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index c98544f71..b668c8f3b 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -798,8 +798,8 @@ skip_motion_search:
         FIRSTPASS_STATS fps;
 
         fps.frame      = cm->current_video_frame ;
-        fps.intra_error = intra_error >> 8;
-        fps.coded_error = coded_error >> 8;
+        fps.intra_error = (double)(intra_error >> 8);
+        fps.coded_error = (double)(coded_error >> 8);
         weight = simple_weight(cpi->Source);
 
 
@@ -841,8 +841,8 @@ skip_motion_search:
         /* TODO:  handle the case when duration is set to 0, or something less
          * than the full time between subsequent cpi->source_time_stamps
          */
-        fps.duration = cpi->source->ts_end
-                       - cpi->source->ts_start;
+        fps.duration = (double)(cpi->source->ts_end
+                       - cpi->source->ts_start);
 
         /* don't want to do output stats with a stack variable! */
         memcpy(&cpi->twopass.this_frame_stats,
@@ -1030,7 +1030,8 @@ static int estimate_max_q(VP8_COMP *cpi,
     /* Estimate of overhead bits per mb */
     /* Correction to overhead bits for min allowed Q. */
     overhead_bits_per_mb = overhead_bits / num_mbs;
-    overhead_bits_per_mb *= pow( 0.98, (double)cpi->twopass.maxq_min_limit );
+    overhead_bits_per_mb = (int)(overhead_bits_per_mb *
+                            pow( 0.98, (double)cpi->twopass.maxq_min_limit ));
 
     /* Try and pick a max Q that will be high enough to encode the
      * content at the given rate.
@@ -1073,7 +1074,7 @@ static int estimate_max_q(VP8_COMP *cpi,
      * Give average a chance to settle though.
      */
     if ( (cpi->ni_frames >
-                  ((unsigned int)cpi->twopass.total_stats.count >> 8)) &&
+                  ((int)cpi->twopass.total_stats.count >> 8)) &&
          (cpi->ni_frames > 150) )
     {
         cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality)
@@ -1880,7 +1881,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         /* For cbr apply buffer related limits */
         if (cpi->drop_frames_allowed)
         {
-            int df_buffer_level = cpi->oxcf.drop_frames_water_mark *
+            int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark *
                                   (cpi->oxcf.optimal_buffer_level / 100);
 
             if (cpi->buffer_level > df_buffer_level)
@@ -2043,8 +2044,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
              * so it now points at the ARF frame.
              */
             half_gf_int = cpi->baseline_gf_interval >> 1;
-            frames_after_arf = cpi->twopass.total_stats.count -
-                               this_frame->frame - 1;
+            frames_after_arf = (int)(cpi->twopass.total_stats.count -
+                               this_frame->frame - 1);
 
             switch (cpi->oxcf.arnr_type)
             {
@@ -2120,11 +2121,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     else
         cpi->twopass.gf_group_bits = 0;
 
-    cpi->twopass.gf_group_bits =
+    cpi->twopass.gf_group_bits = (int)(
         (cpi->twopass.gf_group_bits < 0)
             ? 0
             : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
-                ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
+                ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits);
 
     /* Clip cpi->twopass.gf_group_bits based on user supplied data rate
      * variability limit (cpi->oxcf.two_pass_vbrmax_section)
@@ -2236,8 +2237,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         /* Apply an additional limit for CBR */
         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
         {
-            if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1))
-                cpi->twopass.gf_bits = cpi->buffer_level >> 1;
+            if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1))
+                cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1);
         }
 
         /* Dont allow a negative value for gf_bits */
@@ -2260,7 +2261,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     {
         /* Adjust KF group bits and error remainin */
-        cpi->twopass.kf_group_error_left -= gf_group_err;
+        cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;
         cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
 
         if (cpi->twopass.kf_group_bits < 0)
@@ -2272,9 +2273,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
          * already happened)
          */
         if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME)
-            cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err;
+            cpi->twopass.gf_group_error_left = (int)(gf_group_err -
+                                                     gf_first_frame_err);
         else
-            cpi->twopass.gf_group_error_left = gf_group_err;
+            cpi->twopass.gf_group_error_left = (int) gf_group_err;
 
         cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;
 
@@ -2330,9 +2332,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
         avg_stats(&sectionstats);
 
-        cpi->twopass.section_intra_rating =
-            sectionstats.intra_error /
-            DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
+        cpi->twopass.section_intra_rating = (unsigned int)
+            (sectionstats.intra_error /
+            DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
 
         Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
         cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025);
@@ -2381,7 +2383,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     }
 
     /* Adjust error and bits remaining */
-    cpi->twopass.gf_group_error_left -= modified_err;
+    cpi->twopass.gf_group_error_left -= (int)modified_err;
     cpi->twopass.gf_group_bits -= target_frame_size;
 
     if (cpi->twopass.gf_group_bits < 0)
@@ -2443,8 +2445,9 @@ void vp8_second_pass(VP8_COMP *cpi)
          */
         if (cpi->oxcf.error_resilient_mode)
         {
-            cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits;
-            cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left;
+            cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits;
+            cpi->twopass.gf_group_error_left =
+                                  (int)cpi->twopass.kf_group_error_left;
             cpi->baseline_gf_interval = cpi->twopass.frames_to_key;
             cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
             cpi->source_alt_ref_pending = 0;
@@ -2508,25 +2511,26 @@ void vp8_second_pass(VP8_COMP *cpi)
     }
 
     /* Keep a globally available copy of this and the next frame's iiratio. */
-    cpi->twopass.this_iiratio = this_frame_intra_error /
-                        DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
+    cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error /
+                        DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
     {
         FIRSTPASS_STATS next_frame;
         if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
         {
-            cpi->twopass.next_iiratio = next_frame.intra_error /
-                                DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+            cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error /
+                                DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
         }
     }
 
     /* Set nominal per second bandwidth for this frame */
-    cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;
+    cpi->target_bandwidth = (int)
+    (cpi->per_frame_bandwidth * cpi->output_frame_rate);
     if (cpi->target_bandwidth < 0)
         cpi->target_bandwidth = 0;
 
 
     /* Account for mv, mode and other overheads. */
-    overhead_bits = estimate_modemvcost(
+    overhead_bits = (int)estimate_modemvcost(
                         cpi, &cpi->twopass.total_left_stats );
 
     /* Special case code for first frame. */
@@ -2899,15 +2903,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         /* Additional special case for CBR if buffer is getting full. */
         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
         {
-            int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
-            int buffer_lvl = cpi->buffer_level;
+            int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
+            int64_t buffer_lvl = cpi->buffer_level;
 
             /* If the buffer is near or above the optimal and this kf group is
              * not being allocated much then increase the allocation a bit.
              */
             if (buffer_lvl >= opt_buffer_lvl)
             {
-                int high_water_mark = (opt_buffer_lvl +
+                int64_t high_water_mark = (opt_buffer_lvl +
                                        cpi->oxcf.maximum_buffer_size) >> 1;
 
                 int64_t av_group_bits;
@@ -3005,9 +3009,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
         avg_stats(&sectionstats);
 
-        cpi->twopass.section_intra_rating =
-            sectionstats.intra_error
-            / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
+        cpi->twopass.section_intra_rating = (unsigned int)
+            (sectionstats.intra_error
+            / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
 
         Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
         cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025);
@@ -3023,7 +3027,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
         if (cpi->drop_frames_allowed)
         {
-            int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100);
+            int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark
+                                  * (cpi->oxcf.optimal_buffer_level / 100));
 
             if (cpi->buffer_level > df_buffer_level)
                 max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth);
@@ -3049,7 +3054,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     /* Work out how many bits to allocate for the key frame itself */
     if (1)
     {
-        int kf_boost = boost_score;
+        int kf_boost = (int)boost_score;
         int allocation_chunks;
         int Counter = cpi->twopass.frames_to_key;
         int alt_kf_bits;
@@ -3125,8 +3130,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         /* Apply an additional limit for CBR */
         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
         {
-            if (cpi->twopass.kf_bits > ((3 * cpi->buffer_level) >> 2))
-                cpi->twopass.kf_bits = (3 * cpi->buffer_level) >> 2;
+            if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2))
+                cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2);
         }
 
         /* If the key frame is actually easier than the average for the
@@ -3174,7 +3179,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         cpi->per_frame_bandwidth = cpi->twopass.kf_bits;
 
         /* Convert to a per second bitrate */
-        cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate;
+        cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
+                                      cpi->output_frame_rate);
     }
 
     /* Note the total error score of the kf group minus the key frame itself */
@@ -3195,7 +3201,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         int new_width = cpi->oxcf.Width;
         int new_height = cpi->oxcf.Height;
 
-        int projected_buffer_level = cpi->buffer_level;
+        int projected_buffer_level = (int)cpi->buffer_level;
         int tmp_q;
 
         double projected_bits_perframe;
@@ -3228,7 +3234,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         else
         {
             /* This accounts for how hard the section is... */
-            bits_per_frame = cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key;
+            bits_per_frame = (double)
+                (cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key);
 
             /* Dont turn to resampling in easy sections just because they
              * have been assigned a small number of bits
@@ -3242,7 +3249,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
 
         /* Work out if spatial resampling is necessary */
-        kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio);
+        kf_q = estimate_kf_group_q(cpi, err_per_frame,
+                                  (int)bits_per_frame, group_iiratio);
 
         /* If we project a required Q higher than the maximum allowed Q then
          * make a guess at the actual size of frames in this section
@@ -3257,7 +3265,10 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         }
 
         /* Guess at buffer level at the end of the section */
-        projected_buffer_level = cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key);
+        projected_buffer_level = (int)
+                    (cpi->buffer_level - (int)
+                    ((projected_bits_perframe - av_bits_per_frame) *
+                    cpi->twopass.frames_to_key));
 
         if (0)
         {
@@ -3326,7 +3337,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                 /* Now try again and see what Q we get with the smaller
                  * image size
                  */
-                kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio);
+                kf_q = estimate_kf_group_q(cpi,
+                                          err_per_frame * effective_size_ratio,
+                                          (int)bits_per_frame, group_iiratio);
 
                 if (0)
                 {
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 56142c81b..22da51a7c 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -555,7 +555,12 @@ static void set_default_lf_deltas(VP8_COMP *cpi)
     cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2;
 
     cpi->mb.e_mbd.mode_lf_deltas[0] = 4;               /* BPRED */
-    cpi->mb.e_mbd.mode_lf_deltas[1] = -2;              /* Zero */
+
+    if(cpi->oxcf.Mode == MODE_REALTIME)
+      cpi->mb.e_mbd.mode_lf_deltas[1] = -12;              /* Zero */
+    else
+      cpi->mb.e_mbd.mode_lf_deltas[1] = -2;              /* Zero */
+
     cpi->mb.e_mbd.mode_lf_deltas[2] = 2;               /* New mv */
     cpi->mb.e_mbd.mode_lf_deltas[3] = 4;               /* Split mv */
 }
@@ -1103,22 +1108,47 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
     cpi->gf_update_recommended = 0;
 
 
-    /* Structures used to minitor GF usage */
+    /* Structures used to monitor GF usage */
     vpx_free(cpi->gf_active_flags);
     CHECK_MEM_ERROR(cpi->gf_active_flags,
-                    vpx_calloc(1, cm->mb_rows * cm->mb_cols));
+                    vpx_calloc(sizeof(*cpi->gf_active_flags),
+                    cm->mb_rows * cm->mb_cols));
     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 
     vpx_free(cpi->mb_activity_map);
     CHECK_MEM_ERROR(cpi->mb_activity_map,
-                    vpx_calloc(sizeof(unsigned int),
+                    vpx_calloc(sizeof(*cpi->mb_activity_map),
                     cm->mb_rows * cm->mb_cols));
 
     vpx_free(cpi->mb_norm_activity_map);
     CHECK_MEM_ERROR(cpi->mb_norm_activity_map,
-                    vpx_calloc(sizeof(unsigned int),
+                    vpx_calloc(sizeof(*cpi->mb_norm_activity_map),
                     cm->mb_rows * cm->mb_cols));
 
+    /* allocate memory for storing last frame's MVs for MV prediction. */
+    vpx_free(cpi->lfmv);
+    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
+                    sizeof(*cpi->lfmv)));
+    vpx_free(cpi->lf_ref_frame_sign_bias);
+    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias,
+                    vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
+                    sizeof(*cpi->lf_ref_frame_sign_bias)));
+    vpx_free(cpi->lf_ref_frame);
+    CHECK_MEM_ERROR(cpi->lf_ref_frame,
+                    vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
+                    sizeof(*cpi->lf_ref_frame)));
+
+    /* Create the encoder segmentation map and set all entries to 0 */
+    vpx_free(cpi->segmentation_map);
+    CHECK_MEM_ERROR(cpi->segmentation_map,
+                    vpx_calloc(cm->mb_rows * cm->mb_cols,
+                    sizeof(*cpi->segmentation_map)));
+    vpx_free(cpi->active_map);
+    CHECK_MEM_ERROR(cpi->active_map,
+                    vpx_calloc(cm->mb_rows * cm->mb_cols,
+                    sizeof(*cpi->active_map)));
+    vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
+
 #if CONFIG_MULTITHREAD
     if (width < 640)
         cpi->mt_sync_range = 1;
@@ -1133,14 +1163,13 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
     {
         vpx_free(cpi->mt_current_mb_col);
         CHECK_MEM_ERROR(cpi->mt_current_mb_col,
-                        vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
+                    vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
     }
 
 #endif
 
     vpx_free(cpi->tplist);
-    CHECK_MEM_ERROR(cpi->tplist,
-                    vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
+    CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
 }
 
 
@@ -1211,7 +1240,7 @@ rescale(int val, int num, int denom)
     int64_t llden = denom;
     int64_t llval = val;
 
-    return llval * llnum / llden;
+    return (int)(llval * llnum / llden);
 }
 
 
@@ -1281,28 +1310,29 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
             lc->maximum_buffer_size_in_ms   = oxcf->maximum_buffer_size;
 
             lc->starting_buffer_level =
-              rescale(oxcf->starting_buffer_level,
+              rescale((int)(oxcf->starting_buffer_level),
                           lc->target_bandwidth, 1000);
 
             if (oxcf->optimal_buffer_level == 0)
                 lc->optimal_buffer_level = lc->target_bandwidth / 8;
             else
                 lc->optimal_buffer_level =
-                  rescale(oxcf->optimal_buffer_level,
+                  rescale((int)(oxcf->optimal_buffer_level),
                           lc->target_bandwidth, 1000);
 
             if (oxcf->maximum_buffer_size == 0)
                 lc->maximum_buffer_size = lc->target_bandwidth / 8;
             else
                 lc->maximum_buffer_size =
-                  rescale(oxcf->maximum_buffer_size,
+                  rescale((int)oxcf->maximum_buffer_size,
                           lc->target_bandwidth, 1000);
 
             /* Work out the average size of a frame within this layer */
             if (i > 0)
-                lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] -
-                    cpi->oxcf.target_bitrate[i-1]) * 1000 /
-                    (lc->frame_rate - prev_layer_frame_rate);
+                lc->avg_frame_size_for_layer =
+                  (int)((cpi->oxcf.target_bitrate[i] -
+                         cpi->oxcf.target_bitrate[i-1]) * 1000 /
+                        (lc->frame_rate - prev_layer_frame_rate));
 
             lc->active_worst_quality         = cpi->oxcf.worst_allowed_q;
             lc->active_best_quality          = cpi->oxcf.best_allowed_q;
@@ -1318,7 +1348,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
             lc->rate_correction_factor            = 1.0;
             lc->key_frame_rate_correction_factor  = 1.0;
             lc->gf_rate_correction_factor         = 1.0;
-            lc->inter_frame_target                = 0.0;
+            lc->inter_frame_target                = 0;
 
             prev_layer_frame_rate = lc->frame_rate;
         }
@@ -1355,28 +1385,29 @@ static void update_layer_contexts (VP8_COMP *cpi)
             lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
 
             lc->starting_buffer_level = rescale(
-                          oxcf->starting_buffer_level_in_ms,
+                          (int)oxcf->starting_buffer_level_in_ms,
                           lc->target_bandwidth, 1000);
 
             if (oxcf->optimal_buffer_level == 0)
                 lc->optimal_buffer_level = lc->target_bandwidth / 8;
             else
                 lc->optimal_buffer_level = rescale(
-                          oxcf->optimal_buffer_level_in_ms,
+                          (int)oxcf->optimal_buffer_level_in_ms,
                           lc->target_bandwidth, 1000);
 
             if (oxcf->maximum_buffer_size == 0)
                 lc->maximum_buffer_size = lc->target_bandwidth / 8;
             else
                 lc->maximum_buffer_size = rescale(
-                          oxcf->maximum_buffer_size_in_ms,
+                          (int)oxcf->maximum_buffer_size_in_ms,
                           lc->target_bandwidth, 1000);
 
             /* Work out the average size of a frame within this layer */
             if (i > 0)
-                lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] -
-                    oxcf->target_bitrate[i-1]) * 1000 /
-                    (lc->frame_rate - prev_layer_frame_rate);
+                lc->avg_frame_size_for_layer =
+                   (int)((oxcf->target_bitrate[i] -
+                          oxcf->target_bitrate[i-1]) * 1000 /
+                          (lc->frame_rate - prev_layer_frame_rate));
 
             prev_layer_frame_rate = lc->frame_rate;
         }
@@ -1549,7 +1580,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
     cpi->oxcf.target_bandwidth       *= 1000;
 
     cpi->oxcf.starting_buffer_level =
-        rescale(cpi->oxcf.starting_buffer_level,
+        rescale((int)cpi->oxcf.starting_buffer_level,
                 cpi->oxcf.target_bandwidth, 1000);
 
     /* Set or reset optimal and maximum buffer levels. */
@@ -1557,14 +1588,14 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
         cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
     else
         cpi->oxcf.optimal_buffer_level =
-            rescale(cpi->oxcf.optimal_buffer_level,
+            rescale((int)cpi->oxcf.optimal_buffer_level,
                     cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.maximum_buffer_size == 0)
         cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
     else
         cpi->oxcf.maximum_buffer_size =
-            rescale(cpi->oxcf.maximum_buffer_size,
+            rescale((int)cpi->oxcf.maximum_buffer_size,
                     cpi->oxcf.target_bandwidth, 1000);
 
     /* Set up frame rate and related parameters rate control values. */
@@ -1767,16 +1798,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->alt_is_last  = 0 ;
     cpi->gold_is_alt  = 0 ;
 
-    /* allocate memory for storing last frame's MVs for MV prediction. */
-    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv)));
-    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
-    CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
-
-    /* Create the encoder segmentation map and set all entries to 0 */
-    CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
-
-    CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
-    vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
     cpi->active_map_enabled = 0;
 
 #if 0
@@ -1910,7 +1931,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     else if (cpi->pass == 2)
     {
         size_t packet_sz = sizeof(FIRSTPASS_STATS);
-        int packets = oxcf->two_pass_stats_in.sz / packet_sz;
+        int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
 
         cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
         cpi->twopass.stats_in = cpi->twopass.stats_in_start;
@@ -2096,7 +2117,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
 
                     fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
                                "GLPsnrP\tVPXSSIM\t\n");
-                    for (i=0; i<cpi->oxcf.number_of_layers; i++)
+                    for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
                     {
                         double dr = (double)cpi->bytes_in_layer[i] *
                                               8.0 / 1000.0  / time_encoded;
@@ -2147,7 +2168,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
 
                     fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"
                                "Time(us)\n");
-                    for (i=0; i<cpi->oxcf.number_of_layers; i++)
+                    for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
                     {
                         double dr = (double)cpi->bytes_in_layer[i] *
                                     8.0 / 1000.0  / time_encoded;
@@ -2455,7 +2476,7 @@ static void generate_psnr_packet(VP8_COMP *cpi)
 
     for (i = 0; i < 4; i++)
         pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0,
-                                             pkt.data.psnr.sse[i]);
+                                             (double)(pkt.data.psnr.sse[i]));
 
     vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
 }
@@ -3277,7 +3298,8 @@ static void encode_frame_to_data_rate
     int undershoot_seen = 0;
 #endif
 
-    int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100;
+    int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark *
+                          cpi->oxcf.optimal_buffer_level / 100);
     int drop_mark75 = drop_mark * 2 / 3;
     int drop_mark50 = drop_mark / 4;
     int drop_mark25 = drop_mark / 8;
@@ -3313,7 +3335,8 @@ static void encode_frame_to_data_rate
             /* Per frame bit target for the alt ref frame */
             cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
             /* per second target bitrate */
-            cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate;
+            cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
+                                          cpi->output_frame_rate);
         }
     }
     else
@@ -3562,10 +3585,16 @@ static void encode_frame_to_data_rate
 
             if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size)
             {
-                buff_lvl_step = (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment;
+                buff_lvl_step = (int)
+                                ((cpi->oxcf.maximum_buffer_size -
+                                  cpi->oxcf.optimal_buffer_level) /
+                                  Adjustment);
 
                 if (buff_lvl_step)
-                    Adjustment = (cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step;
+                    Adjustment = (int)
+                                 ((cpi->buffer_level -
+                                 cpi->oxcf.optimal_buffer_level) /
+                                 buff_lvl_step);
                 else
                     Adjustment = 0;
             }
@@ -3686,8 +3715,12 @@ static void encode_frame_to_data_rate
 
             else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)
             {
-                int Fraction = ((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level);
-                int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128;
+                int Fraction = (int)
+                  (((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128)
+                  / (cpi->oxcf.maximum_buffer_size -
+                  cpi->oxcf.optimal_buffer_level));
+                int min_qadjustment = ((cpi->active_best_quality -
+                                        cpi->best_quality) * Fraction) / 128;
 
                 cpi->active_best_quality -= min_qadjustment;
             }
@@ -4448,8 +4481,9 @@ static void encode_frame_to_data_rate
         for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
         {
             LAYER_CONTEXT *lc = &cpi->layer_context[i];
-            int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate
-                                                - cpi->projected_frame_size;
+            int bits_off_for_this_layer =
+               (int)(lc->target_bandwidth / lc->frame_rate -
+                     cpi->projected_frame_size);
 
             lc->bits_off_target += bits_off_for_this_layer;
 
@@ -4989,7 +5023,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                             - cpi->last_time_stamp_seen;
             /* do a step update if the duration changes by 10% */
             if (last_duration)
-                step = ((this_duration - last_duration) * 10 / last_duration);
+                step = (int)(((this_duration - last_duration) *
+                            10 / last_duration));
         }
 
         if (this_duration)
@@ -5004,7 +5039,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                  * frame rate. If we haven't seen 1 second yet, then average
                  * over the whole interval seen.
                  */
-                interval = cpi->source->ts_end - cpi->first_time_stamp_ever;
+                interval = (double)(cpi->source->ts_end -
+                                    cpi->first_time_stamp_ever);
                 if(interval > 10000000.0)
                     interval = 10000000;
 
@@ -5136,7 +5172,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
         vpx_usec_timer_mark(&tsctimer);
         vpx_usec_timer_mark(&ticktimer);
 
-        duration = vpx_usec_timer_elapsed(&ticktimer);
+        duration = (int)(vpx_usec_timer_elapsed(&ticktimer));
         duration2 = (unsigned int)((double)duration / 2);
 
         if (cm->frame_type != KEY_FRAME)
@@ -5215,14 +5251,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
 
             if (cpi->b_calculate_psnr)
             {
-                double ye,ue,ve;
+                uint64_t ye,ue,ve;
                 double frame_psnr;
                 YV12_BUFFER_CONFIG      *orig = cpi->Source;
                 YV12_BUFFER_CONFIG      *recon = cpi->common.frame_to_show;
                 int y_samples = orig->y_height * orig->y_width ;
                 int uv_samples = orig->uv_height * orig->uv_width ;
                 int t_samples = y_samples + 2 * uv_samples;
-                int64_t sq_error, sq_error2;
+                double sq_error, sq_error2;
 
                 ye = calc_plane_error(orig->y_buffer, orig->y_stride,
                   recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
@@ -5233,13 +5269,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                 ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
                   recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
 
-                sq_error = ye + ue + ve;
+                sq_error = (double)(ye + ue + ve);
 
                 frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
 
-                cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye);
-                cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue);
-                cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
+                cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye);
+                cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue);
+                cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve);
                 cpi->total_sq_error += sq_error;
                 cpi->total  += frame_psnr;
 #if CONFIG_POSTPROC
@@ -5260,13 +5296,16 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                     ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
                       pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
 
-                    sq_error2 = ye + ue + ve;
+                    sq_error2 = (double)(ye + ue + ve);
 
                     frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2);
 
-                    cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye);
-                    cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue);
-                    cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve);
+                    cpi->totalp_y += vp8_mse2psnr(y_samples,
+                                                  255.0, (double)ye);
+                    cpi->totalp_u += vp8_mse2psnr(uv_samples,
+                                                  255.0, (double)ue);
+                    cpi->totalp_v += vp8_mse2psnr(uv_samples,
+                                                  255.0, (double)ve);
                     cpi->total_sq_error2 += sq_error2;
                     cpi->totalp  += frame_psnr2;
 
@@ -5278,7 +5317,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
 
                     if (cpi->oxcf.number_of_layers > 1)
                     {
-                         int i;
+                         unsigned int i;
 
                          for (i=cpi->current_layer;
                                        i<cpi->oxcf.number_of_layers; i++)
@@ -5306,7 +5345,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
 
                 if (cpi->oxcf.number_of_layers > 1)
                 {
-                    int i;
+                    unsigned int i;
 
                     for (i=cpi->current_layer;
                          i<cpi->oxcf.number_of_layers; i++)
@@ -5414,7 +5453,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
 {
     signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
     int internal_delta_q[MAX_MB_SEGMENTS];
-    const unsigned int range = 63;
+    const int range = 63;
     int i;
 
     // This method is currently incompatible with the cyclic refresh method
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 584cadae6..caccc60ae 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -236,17 +236,17 @@ typedef struct
     int target_bandwidth;
 
     /* Layer specific coding parameters */
-    int starting_buffer_level;
-    int optimal_buffer_level;
-    int maximum_buffer_size;
-    int starting_buffer_level_in_ms;
-    int optimal_buffer_level_in_ms;
-    int maximum_buffer_size_in_ms;
+    int64_t starting_buffer_level;
+    int64_t optimal_buffer_level;
+    int64_t maximum_buffer_size;
+    int64_t starting_buffer_level_in_ms;
+    int64_t optimal_buffer_level_in_ms;
+    int64_t maximum_buffer_size_in_ms;
 
     int avg_frame_size_for_layer;
 
-    int buffer_level;
-    int bits_off_target;
+    int64_t buffer_level;
+    int64_t bits_off_target;
 
     int64_t total_actual_bits;
     int total_target_vs_actual;
@@ -431,7 +431,7 @@ typedef struct VP8_COMP
     double frame_rate;
     double ref_frame_rate;
     int64_t buffer_level;
-    int bits_off_target;
+    int64_t bits_off_target;
 
     int rolling_target_bits;
     int rolling_actual_bits;
@@ -569,10 +569,10 @@ typedef struct VP8_COMP
     vp8_refining_search_fn_t refining_search_sad;
     vp8_diamond_search_fn_t diamond_search_sad;
     vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
-    unsigned int time_receive_data;
-    unsigned int time_compress_data;
-    unsigned int time_pick_lpf;
-    unsigned int time_encode_mb_row;
+    uint64_t time_receive_data;
+    uint64_t time_compress_data;
+    uint64_t time_pick_lpf;
+    uint64_t time_encode_mb_row;
 
     int base_skip_false_prob[128];
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 77d9152eb..b67f04b85 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -151,7 +151,7 @@ static int pick_intra4x4block(
     unsigned char *yleft = dst - 1;
     unsigned char top_left = Above[-1];
 
-    for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++)
+    for (mode = B_DC_PRED; mode <= B_HE_PRED; mode++)
     {
         int this_rd;
 
@@ -171,7 +171,7 @@ static int pick_intra4x4block(
         }
     }
 
-    b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
+    b->bmi.as_mode = *best_mode;
     vp8_encode_intra4x4block(x, ib);
     return best_rd;
 }
@@ -458,7 +458,15 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
 
 static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
 {
-    if (sse < x->encode_breakout)
+    MACROBLOCKD *xd = &x->e_mbd;
+
+    unsigned int threshold = (xd->block[0].dequant[1]
+        * xd->block[0].dequant[1] >>4);
+
+    if(threshold < x->encode_breakout)
+        threshold = x->encode_breakout;
+
+    if (sse < threshold )
     {
         /* Check u and v to make sure skip is ok */
         unsigned int sse2 = 0;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index e1c8c4eb7..4dc078a1d 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -353,7 +353,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
 {
     /* boost defaults to half second */
     int kf_boost;
-    unsigned int target;
+    uint64_t target;
 
     /* Clear down mmx registers to allow floating point in what follows */
     vp8_clear_system_state();
@@ -423,7 +423,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
             target = max_rate;
     }
 
-    cpi->this_frame_target = target;
+    cpi->this_frame_target = (int)target;
 
     /* TODO: if we separate rate targeting from Q targetting, move this.
      * Reset the active worst quality to the baseline value for key frames.
@@ -747,7 +747,8 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
         /* Adapt target frame size with respect to any buffering constraints: */
         if (cpi->buffered_mode)
         {
-            int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100;
+            int one_percent_bits = (int)
+                (1 + cpi->oxcf.optimal_buffer_level / 100);
 
             if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) ||
                 (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
@@ -764,9 +765,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
                 if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
                     (cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
                 {
-                    percent_low =
-                        (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
-                        one_percent_bits;
+                    percent_low = (int)
+                        ((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
+                        one_percent_bits);
                 }
                 /* Are we overshooting the long term clip data rate... */
                 else if (cpi->bits_off_target < 0)
@@ -790,7 +791,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
                  */
                 if (cpi->auto_worst_q && cpi->ni_frames > 150)
                 {
-                    int critical_buffer_level;
+                    int64_t critical_buffer_level;
 
                     /* For streaming applications the most important factor is
                      * cpi->buffer_level as this takes into account the
@@ -841,7 +842,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
                              */
                             cpi->active_worst_quality =
                                 cpi->worst_quality -
-                                ((qadjustment_range * above_base) /
+                                (int)((qadjustment_range * above_base) /
                                  (cpi->oxcf.optimal_buffer_level*3>>2));
                         }
                         else
@@ -866,9 +867,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
                 if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
                      && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level))
                 {
-                    percent_high = (cpi->buffer_level
+                    percent_high = (int)((cpi->buffer_level
                                     - cpi->oxcf.optimal_buffer_level)
-                                   / one_percent_bits;
+                                   / one_percent_bits);
                 }
                 else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level)
                 {
@@ -956,7 +957,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
             /* Update the buffer level variable. */
             cpi->bits_off_target += cpi->av_per_frame_bandwidth;
             if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
-              cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
+              cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size;
             cpi->buffer_level = cpi->bits_off_target;
         }
     }
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 005f26213..28d5c1ee8 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -694,7 +694,7 @@ static int rd_pick_intra4x4block(
             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
         }
     }
-    b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
+    b->bmi.as_mode = *best_mode;
 
     vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index bd971fab5..a328f46c2 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -148,7 +148,7 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/idct_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/loopfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/simpleloopfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/sixtappredict8x4_v6$(ASM)
-#VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/intra4x4_predict_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/intra4x4_predict_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/dequant_idct_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/dequantize_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/idct_blk_v6.c
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 072314f24..eeac3a8b1 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -212,7 +212,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     if (cfg->g_pass == VPX_RC_LAST_PASS)
     {
         size_t           packet_sz = sizeof(FIRSTPASS_STATS);
-        int              n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
+        int              n_packets = (int)(cfg->rc_twopass_stats_in.sz /
+                                          packet_sz);
         FIRSTPASS_STATS *stats;
 
         if (!cfg->rc_twopass_stats_in.buf)
@@ -891,15 +892,16 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
                 VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
 
                 /* Add the frame packet to the list of returned packets. */
-                round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1;
+                round = (vpx_codec_pts_t)1000000
+                        * ctx->cfg.g_timebase.num / 2 - 1;
                 delta = (dst_end_time_stamp - dst_time_stamp);
                 pkt.kind = VPX_CODEC_CX_FRAME_PKT;
                 pkt.data.frame.pts =
                     (dst_time_stamp * ctx->cfg.g_timebase.den + round)
                     / ctx->cfg.g_timebase.num / 10000000;
-                pkt.data.frame.duration =
-                    (delta * ctx->cfg.g_timebase.den + round)
-                    / ctx->cfg.g_timebase.num / 10000000;
+                pkt.data.frame.duration = (unsigned long)
+                    ((delta * ctx->cfg.g_timebase.den + round)
+                    / ctx->cfg.g_timebase.num / 10000000);
                 pkt.data.frame.flags = lib_flags << 16;
 
                 if (lib_flags & FRAMEFLAGS_KEY)