247 files changed, 4950 insertions, 3760 deletions
diff --git a/args.h b/args.h
index ea909cb46..04e0acdf1 100644
--- a/args.h
+++ b/args.h
@@ -13,6 +13,10 @@
 #define ARGS_H_
 #include <stdio.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct arg {
   char                 **argv;
   const char            *name;
@@ -48,4 +52,8 @@ unsigned int arg_parse_uint(const struct arg *arg);
 int arg_parse_int(const struct arg *arg);
 struct vpx_rational arg_parse_rational(const struct arg *arg);
 int arg_parse_enum_or_int(const struct arg *arg);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // ARGS_H_
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index 495e9d7fa..8c169577c 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -21,6 +21,7 @@ typedef enum {
   OUTPUT_FMT_PLAIN,
   OUTPUT_FMT_RVDS,
   OUTPUT_FMT_GAS,
+  OUTPUT_FMT_C_HEADER,
 } output_fmt_t;
 
 int log_msg(const char *fmt, ...) {
@@ -43,9 +44,12 @@ int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
     case OUTPUT_FMT_RVDS:
       printf("%-40s EQU %5d\n", name, val);
       return 0;
-    case  OUTPUT_FMT_GAS:
+    case OUTPUT_FMT_GAS:
       printf(".set %-40s, %5d\n", name, val);
       return 0;
+    case OUTPUT_FMT_C_HEADER:
+      printf("#define %-40s %5d\n", name, val);
+      return 0;
     default:
       log_msg("Unsupported mode: %d", mode);
       return 1;
@@ -491,6 +495,13 @@ int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
                                               sym.st_name),
                        val);
                 break;
+              case OUTPUT_FMT_C_HEADER:
+                printf("#define %-40s %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
               default:
                 printf("%s = %d\n",
                        parse_elf_string_table(&elf,
@@ -762,6 +773,7 @@ int main(int argc, char **argv) {
     fprintf(stderr, "Output Formats:\n");
     fprintf(stderr, "  gas  - compatible with GNU assembler\n");
     fprintf(stderr, "  rvds - compatible with armasm\n");
+    fprintf(stderr, "  cheader - c/c++ header file\n");
     goto bail;
   }
 
@@ -771,6 +783,8 @@ int main(int argc, char **argv) {
     mode = OUTPUT_FMT_RVDS;
   else if (!strcmp(argv[1], "gas"))
     mode = OUTPUT_FMT_GAS;
+  else if (!strcmp(argv[1], "cheader"))
+    mode = OUTPUT_FMT_C_HEADER;
   else
     f = argv[1];
 
diff --git a/examples.mk b/examples.mk
index b29ab9c34..0b62df96a 100644
--- a/examples.mk
+++ b/examples.mk
@@ -41,6 +41,7 @@ UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
 vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h vpxenc.h
 vpxenc.SRCS                 += ivfdec.c ivfdec.h
 vpxenc.SRCS                 += ivfenc.c ivfenc.h
+vpxenc.SRCS                 += rate_hist.c rate_hist.h
 vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += warnings.c warnings.h
 vpxenc.SRCS                 += webmenc.c webmenc.h
@@ -64,21 +65,31 @@ vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
 vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
 vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
 
+ifeq ($(CONFIG_SHARED),no)
+UTILS-$(CONFIG_VP9_ENCODER)    += resize_util.c
+endif
+
 # XMA example disabled for now, not used in VP8
 #UTILS-$(CONFIG_DECODERS)    += example_xma.c
 #example_xma.GUID             = A955FC4A-73F1-44F7-135E-30D84D32F022
 #example_xma.DESCRIPTION      = External Memory Allocation mode usage
 
 GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += simple_decoder.c
-simple_decoder.GUID              = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
-simple_decoder.DESCRIPTION       = Simplified decoder loop
+simple_decoder.GUID                = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
+simple_decoder.SRCS                += ivfdec.h ivfdec.c
+simple_decoder.SRCS                += tools_common.h tools_common.c
+simple_decoder.DESCRIPTION         = Simplified decoder loop
 GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += postproc.c
+postproc.SRCS                      += ivfdec.h ivfdec.c
+postproc.SRCS                      += tools_common.h tools_common.c
 postproc.GUID                    = 65E33355-F35E-4088-884D-3FD4905881D7
 postproc.DESCRIPTION             = Decoder postprocessor control
 GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += decode_to_md5.c
-decode_to_md5.SRCS              += md5_utils.h md5_utils.c
-decode_to_md5.GUID               = 59120B9B-2735-4BFE-B022-146CA340FE42
-decode_to_md5.DESCRIPTION        = Frame by frame MD5 checksum
+decode_to_md5.SRCS                 += md5_utils.h md5_utils.c
+decode_to_md5.SRCS                 += ivfdec.h ivfdec.c
+decode_to_md5.SRCS                 += tools_common.h tools_common.c
+decode_to_md5.GUID                 = 59120B9B-2735-4BFE-B022-146CA340FE42
+decode_to_md5.DESCRIPTION          = Frame by frame MD5 checksum
 
 GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += simple_encoder.c
 simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
@@ -91,6 +102,8 @@ force_keyframe.GUID              = 3C67CADF-029F-4C86-81F5-D6D4F51177F0
 force_keyframe.DESCRIPTION       = Force generation of keyframes
 ifeq ($(CONFIG_DECODERS),yes)
 GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += decode_with_drops.c
+decode_with_drops.SRCS             += ivfdec.h ivfdec.c
+decode_with_drops.SRCS             += tools_common.h tools_common.c
 endif
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
diff --git a/examples/decode_to_md5.c b/examples/decode_to_md5.c
index 2ceb17a35..bba218209 100644
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -29,54 +29,19 @@
 // is processed, then U, then V. It is important to honor the image's `stride`
 // values.
 
-#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
-#include "md5_utils.h"
-
-#define VP8_FOURCC 0x30385056
-#define VP9_FOURCC 0x30395056
-
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static vpx_codec_iface_t *get_codec_interface(unsigned int fourcc) {
-  switch (fourcc) {
-    case VP8_FOURCC:
-      return vpx_codec_vp8_dx();
-    case VP9_FOURCC:
-      return vpx_codec_vp9_dx();
-  }
-  return NULL;
-}
-
-static unsigned int mem_get_le32(const unsigned char *mem) {
-  return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
-}
-
-static void die(const char *fmt, ...) {
-  va_list ap;
-
-  va_start(ap, fmt);
-  vprintf(fmt, ap);
-  if (fmt[strlen(fmt) - 1] != '\n')
-    printf("\n");
-  exit(EXIT_FAILURE);
-}
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-  const char *detail = vpx_codec_error_detail(ctx);
-
-  printf("%s: %s\n", s, vpx_codec_error(ctx));
-  if(detail)
-    printf("    %s\n",detail);
-  exit(EXIT_FAILURE);
-}
+#include "./ivfdec.h"
+#include "./md5_utils.h"
+#include "./tools_common.h"
+#include "./vpx_config.h"
 
 static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
   int plane, y;
@@ -106,17 +71,24 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {
     fprintf(stream, "%02x", digest[i]);
 }
 
+static const char *exec_name;
+
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
+  exit(EXIT_FAILURE);
+}
+
 int main(int argc, char **argv) {
   FILE *infile, *outfile;
   vpx_codec_ctx_t codec;
   vpx_codec_iface_t *iface;
   int flags = 0, frame_cnt = 0;
-  unsigned char file_hdr[IVF_FILE_HDR_SZ];
-  unsigned char frame_hdr[IVF_FRAME_HDR_SZ];
-  unsigned char frame[256 * 1024];
+  vpx_video_t *video;
+
+  exec_name = argv[0];
 
   if (argc != 3)
-    die("Usage: %s <infile> <outfile>\n", argv[0]);
+    die("Invalid number of arguments");
 
   if (!(infile = fopen(argv[1], "rb")))
     die("Failed to open %s for reading", argv[1]);
@@ -124,32 +96,24 @@ int main(int argc, char **argv) {
   if (!(outfile = fopen(argv[2], "wb")))
     die("Failed to open %s for writing", argv[2]);
 
-  if (!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ &&
-     file_hdr[0] == 'D' && file_hdr[1] == 'K' &&
-     file_hdr[2] == 'I' && file_hdr[3] == 'F'))
+  video = vpx_video_open_file(infile);
+  if (!video)
     die("%s is not an IVF file.", argv[1]);
 
-  iface = get_codec_interface(mem_get_le32(file_hdr + 8));
+  iface = get_codec_interface(vpx_video_get_fourcc(video));
   if (!iface)
     die("Unknown FOURCC code.");
 
-
   printf("Using %s\n", vpx_codec_iface_name(iface));
 
   if (vpx_codec_dec_init(&codec, iface, NULL, flags))
     die_codec(&codec, "Failed to initialize decoder");
 
-  while (fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
-    const int frame_size = mem_get_le32(frame_hdr);
+  while (vpx_video_read_frame(video)) {
     vpx_codec_iter_t iter = NULL;
-    vpx_image_t *img;
-
-    if (frame_size > sizeof(frame))
-      die("Frame %d data too big for example code buffer", frame_size);
-
-    if (fread(frame, 1, frame_size, infile) != frame_size)
-      die("Failed to read complete frame");
-
+    vpx_image_t *img = NULL;
+    size_t frame_size = 0;
+    const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
     if (vpx_codec_decode(&codec, frame, frame_size, NULL, 0))
       die_codec(&codec, "Failed to decode frame");
 
@@ -167,6 +131,8 @@ int main(int argc, char **argv) {
   if (vpx_codec_destroy(&codec))
     die_codec(&codec, "Failed to destroy codec");
 
+  vpx_video_close(video);
+
   fclose(outfile);
   fclose(infile);
   return EXIT_SUCCESS;
diff --git a/examples/decode_with_drops.c b/examples/decode_with_drops.c
index bfb6d3a55..12686dedd 100644
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -52,126 +52,103 @@
 // The example decides whether to drop the frame based on the current
 // frame number, immediately before decoding the frame.
 
-#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include "./ivfdec.h"
+
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
 
+#include "./tools_common.h"
+#include "./vpx_config.h"
 
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
+static const char *exec_name;
 
-static unsigned int mem_get_le32(const unsigned char *mem) {
-    return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
+  exit(EXIT_FAILURE);
 }
 
-static void die(const char *fmt, ...) {
-    va_list ap;
+int main(int argc, char **argv) {
+  FILE *infile, *outfile;
+  vpx_codec_ctx_t codec;
+  vpx_codec_iface_t *iface;
+  int flags = 0, frame_cnt = 0;
+  vpx_video_t *video;
+  int n, m, is_range;
+  char *nptr;
+
+  exec_name = argv[0];
 
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
-}
+  if (argc != 4)
+    die("Invalid number of arguments");
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
+  if (!(infile = fopen(argv[1], "rb")))
+    die("Failed to open %s for reading", argv[1]);
 
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
+  if (!(outfile = fopen(argv[2], "wb")))
+    die("Failed to open %s for writing", argv[2]);
 
+  n = strtol(argv[3], &nptr, 0);
+  m = strtol(nptr + 1, NULL, 0);
+  is_range = (*nptr == '-');
+  if (!n || !m || (*nptr != '-' && *nptr != '/'))
+    die("Couldn't parse pattern %s\n", argv[3]);
 
-int main(int argc, char **argv) {
-    FILE            *infile, *outfile;
-    vpx_codec_ctx_t  codec;
-    int              flags = 0, frame_cnt = 0;
-    unsigned char    file_hdr[IVF_FILE_HDR_SZ];
-    unsigned char    frame_hdr[IVF_FRAME_HDR_SZ];
-    unsigned char    frame[256*1024];
-    vpx_codec_err_t  res;
-    int              n, m, is_range;
-
-    (void)res;
-    /* Open files */
-    if(argc!=4)
-        die("Usage: %s <infile> <outfile> <N-M|N/M>\n", argv[0]);
-    {
-        char *nptr;
-        n = strtol(argv[3], &nptr, 0);
-        m = strtol(nptr+1, NULL, 0);
-        is_range = *nptr == '-';
-        if(!n || !m || (*nptr != '-' && *nptr != '/'))
-            die("Couldn't parse pattern %s\n", argv[3]);
-    }
-    if(!(infile = fopen(argv[1], "rb")))
-        die("Failed to open %s for reading", argv[1]);
-    if(!(outfile = fopen(argv[2], "wb")))
-        die("Failed to open %s for writing", argv[2]);
-
-    /* Read file header */
-    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
-         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
-         && file_hdr[3]=='F'))
-        die("%s is not an IVF file.", argv[1]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-    /* Initialize codec */
-    if(vpx_codec_dec_init(&codec, interface, NULL, flags))
-        die_codec(&codec, "Failed to initialize decoder");
-
-    /* Read each frame */
-    while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
-        int               frame_sz = mem_get_le32(frame_hdr);
-        vpx_codec_iter_t  iter = NULL;
-        vpx_image_t      *img;
-
-
-        frame_cnt++;
-        if(frame_sz > sizeof(frame))
-            die("Frame %d data too big for example code buffer", frame_sz);
-        if(fread(frame, 1, frame_sz, infile) != frame_sz)
-            die("Frame %d failed to read complete frame", frame_cnt);
-
-        if((is_range && frame_cnt >= n && frame_cnt <= m)
-           ||(!is_range && m - (frame_cnt-1)%m <= n)) {
-           putc('X', stdout);
-           continue;
-        }
-        putc('.', stdout);
-        fflush(stdout);
-        /* Decode the frame */
-        if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0))
-            die_codec(&codec, "Failed to decode frame");
-
-        /* Write decoded data to disk */
-        while((img = vpx_codec_get_frame(&codec, &iter))) {
-            unsigned int plane, y;
-
-            for(plane=0; plane < 3; plane++) {
-                unsigned char *buf =img->planes[plane];
-            
-                for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
-                    (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
-                                  outfile);
-                    buf += img->stride[plane];
-                }
-            }
-        }
+  video = vpx_video_open_file(infile);
+  if (!video)
+    die("%s is not a supported input file.", argv[1]);
+
+  iface = get_codec_interface(vpx_video_get_fourcc(video));
+  if (!iface)
+    die("Unknown FOURCC code.");
+
+  printf("Using %s\n", vpx_codec_iface_name(iface));
+
+  if (vpx_codec_dec_init(&codec, iface, NULL, flags))
+    die_codec(&codec, "Failed to initialize decoder");
+
+  while (vpx_video_read_frame(video)) {
+    vpx_codec_iter_t iter = NULL;
+    vpx_image_t *img = NULL;
+    size_t frame_size = 0;
+    int skip;
+    const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
+    if (vpx_codec_decode(&codec, frame, frame_size, NULL, 0))
+      die_codec(&codec, "Failed to decode frame");
+
+    ++frame_cnt;
+
+    skip = (is_range && frame_cnt >= n && frame_cnt <= m) ||
+           (!is_range && m - (frame_cnt - 1) % m <= n);
+
+    if (!skip) {
+      putc('.', stdout);
+
+      while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL)
+        vpx_img_write(img, outfile);
+    } else {
+      putc('X', stdout);
     }
-    printf("Processed %d frames.\n",frame_cnt);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
 
-    fclose(outfile);
-    fclose(infile);
-    return EXIT_SUCCESS;
+    fflush(stdout);
+  }
+
+  printf("Processed %d frames.\n", frame_cnt);
+  if (vpx_codec_destroy(&codec))
+    die_codec(&codec, "Failed to destroy codec");
+
+  printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
+         vpx_video_get_width(video), vpx_video_get_height(video), argv[2]);
+
+  vpx_video_close(video);
+
+  fclose(outfile);
+  fclose(infile);
+
+  return EXIT_SUCCESS;
 }
diff --git a/examples/example_xma.c b/examples/example_xma.c
index 0e33ff46f..c960c2846 100644
--- a/examples/example_xma.c
+++ b/examples/example_xma.c
@@ -27,7 +27,7 @@ static int   verbose = 0;
 
 static const struct {
   const char *name;
-  const vpx_codec_iface_t *iface;
+  vpx_codec_iface_t *iface;
 } ifaces[] = {
 #if CONFIG_VP9_DECODER
   {"vp9",  &vpx_codec_vp8_dx_algo},
diff --git a/examples/postproc.c b/examples/postproc.c
index 0d0d6b59a..4ec2d1f1c 100644
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -39,130 +39,105 @@
 // postprocessors. VP8 is one example. The following sample code toggles
 // postprocessing on and off every 15 frames.
 
-#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include "./ivfdec.h"
+
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
 
+#include "./tools_common.h"
+#include "./vpx_config.h"
 
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
+static const char *exec_name;
 
-static unsigned int mem_get_le32(const unsigned char *mem) {
-    return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
+  exit(EXIT_FAILURE);
 }
 
-static void die(const char *fmt, ...) {
-    va_list ap;
+int main(int argc, char **argv) {
+  FILE *infile, *outfile;
+  vpx_codec_ctx_t codec;
+  vpx_codec_iface_t *iface;
+  int frame_cnt = 0;
+  vpx_video_t *video;
+  vpx_codec_err_t res;
+
+  exec_name = argv[0];
 
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
-}
+  if (argc != 3)
+    die("Invalid number of arguments");
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
+  if (!(infile = fopen(argv[1], "rb")))
+    die("Failed to open %s for reading", argv[1]);
 
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
+  if (!(outfile = fopen(argv[2], "wb")))
+    die("Failed to open %s for writing", argv[2]);
 
+  video = vpx_video_open_file(infile);
+  if (!video)
+    die("%s is not a supported input file.", argv[1]);
 
-int main(int argc, char **argv) {
-    FILE            *infile, *outfile;
-    vpx_codec_ctx_t  codec;
-    int              flags = 0, frame_cnt = 0;
-    unsigned char    file_hdr[IVF_FILE_HDR_SZ];
-    unsigned char    frame_hdr[IVF_FRAME_HDR_SZ];
-    unsigned char    frame[256*1024];
-    vpx_codec_err_t  res;
-
-    (void)res;
-    /* Open files */
-    if(argc!=3)
-        die("Usage: %s <infile> <outfile>\n", argv[0]);
-    if(!(infile = fopen(argv[1], "rb")))
-        die("Failed to open %s for reading", argv[1]);
-    if(!(outfile = fopen(argv[2], "wb")))
-        die("Failed to open %s for writing", argv[2]);
-
-    /* Read file header */
-    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
-         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
-         && file_hdr[3]=='F'))
-        die("%s is not an IVF file.", argv[1]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-    /* Initialize codec */
-    res = vpx_codec_dec_init(&codec, interface, NULL,
-                             VPX_CODEC_USE_POSTPROC);
-    if(res == VPX_CODEC_INCAPABLE) {
-        printf("NOTICE: Postproc not supported by %s\n",
-               vpx_codec_iface_name(interface));
-        res = vpx_codec_dec_init(&codec, interface, NULL, flags);
-    }
-    if(res)
-        die_codec(&codec, "Failed to initialize decoder");
-
-    /* Read each frame */
-    while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
-        int               frame_sz = mem_get_le32(frame_hdr);
-        vpx_codec_iter_t  iter = NULL;
-        vpx_image_t      *img;
-
-
-        frame_cnt++;
-        if(frame_sz > sizeof(frame))
-            die("Frame %d data too big for example code buffer", frame_sz);
-        if(fread(frame, 1, frame_sz, infile) != frame_sz)
-            die("Frame %d failed to read complete frame", frame_cnt);
-
-        #if CONFIG_VP9_DECODER
-        if(frame_cnt%30 == 1) {
-            vp8_postproc_cfg_t  pp = {0, 0, 0};
-
-            if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
-                die_codec(&codec, "Failed to turn off postproc");
-        } else if(frame_cnt%30 == 16) {
-            vp8_postproc_cfg_t  pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0};
-
-            if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
-                die_codec(&codec, "Failed to turn on postproc");
-        };
-        #endif
-        /* Decode the frame with 15ms deadline */
-        if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 15000))
-            die_codec(&codec, "Failed to decode frame");
-
-        /* Write decoded data to disk */
-        while((img = vpx_codec_get_frame(&codec, &iter))) {
-            unsigned int plane, y;
-
-            for(plane=0; plane < 3; plane++) {
-                unsigned char *buf =img->planes[plane];
-            
-                for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
-                    (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
-                                  outfile);
-                    buf += img->stride[plane];
-                }
-            }
-        }
+  iface = get_codec_interface(vpx_video_get_fourcc(video));
+  if (!iface)
+    die("Unknown FOURCC code.");
+
+  printf("Using %s\n", vpx_codec_iface_name(iface));
+
+
+  res = vpx_codec_dec_init(&codec, iface, NULL, VPX_CODEC_USE_POSTPROC);
+  if (res == VPX_CODEC_INCAPABLE) {
+    printf("NOTICE: Postproc not supported.\n");
+    res = vpx_codec_dec_init(&codec, iface, NULL, 0);
+  }
+
+  if (res)
+    die_codec(&codec, "Failed to initialize decoder");
+
+  while (vpx_video_read_frame(video)) {
+    vpx_codec_iter_t iter = NULL;
+    vpx_image_t *img = NULL;
+    size_t frame_size = 0;
+    const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
+
+    ++frame_cnt;
+
+    if (frame_cnt % 30 == 1) {
+      vp8_postproc_cfg_t pp = {0, 0, 0};
+
+    if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
+      die_codec(&codec, "Failed to turn off postproc");
+    } else if (frame_cnt % 30 == 16) {
+      vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE,
+                               4, 0};
+      if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
+        die_codec(&codec, "Failed to turn on postproc");
+    };
+
+    // Decode the frame with 15ms deadline
+    if (vpx_codec_decode(&codec, frame, frame_size, NULL, 15000))
+      die_codec(&codec, "Failed to decode frame");
+
+    while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) {
+      vpx_img_write(img, outfile);
     }
-    printf("Processed %d frames.\n",frame_cnt);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
+  }
+
+  printf("Processed %d frames.\n", frame_cnt);
+  if (vpx_codec_destroy(&codec))
+    die_codec(&codec, "Failed to destroy codec");
+
+  printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
+         vpx_video_get_width(video), vpx_video_get_height(video), argv[2]);
+
+  vpx_video_close(video);
 
-    fclose(outfile);
-    fclose(infile);
-    return EXIT_SUCCESS;
+  fclose(outfile);
+  fclose(infile);
+  return EXIT_SUCCESS;
 }
diff --git a/examples/simple_decoder.c b/examples/simple_decoder.c
index 17a598739..23399f44f 100644
--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -77,110 +77,82 @@
 // few exeptions, vpx_codec functions return an enumerated error status,
 // with the value `0` indicating success.
 
-#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
 
+#include "./ivfdec.h"
+#include "./tools_common.h"
+#include "./vpx_config.h"
 
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
+static const char *exec_name;
 
-static unsigned int mem_get_le32(const unsigned char *mem) {
-    return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
+  exit(EXIT_FAILURE);
 }
 
-static void die(const char *fmt, ...) {
-    va_list ap;
+int main(int argc, char **argv) {
+  FILE *infile, *outfile;
+  vpx_codec_ctx_t codec;
+  vpx_codec_iface_t *iface;
+  int flags = 0, frame_cnt = 0;
+  vpx_video_t *video;
 
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
-}
+  exec_name = argv[0];
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
+  if (argc != 3)
+    die("Invalid number of arguments");
 
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
+  if (!(infile = fopen(argv[1], "rb")))
+    die("Failed to open %s for reading", argv[1]);
 
+  if (!(outfile = fopen(argv[2], "wb")))
+    die("Failed to open %s for writing", argv[2]);
 
-int main(int argc, char **argv) {
-    FILE            *infile, *outfile;
-    vpx_codec_ctx_t  codec;
-    int              flags = 0, frame_cnt = 0;
-    unsigned char    file_hdr[IVF_FILE_HDR_SZ];
-    unsigned char    frame_hdr[IVF_FRAME_HDR_SZ];
-    unsigned char    frame[256*1024];
-    vpx_codec_err_t  res;
-
-    (void)res;
-    /* Open files */
-    if(argc!=3)
-        die("Usage: %s <infile> <outfile>\n", argv[0]);
-    if(!(infile = fopen(argv[1], "rb")))
-        die("Failed to open %s for reading", argv[1]);
-    if(!(outfile = fopen(argv[2], "wb")))
-        die("Failed to open %s for writing", argv[2]);
-
-    /* Read file header */
-    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
-         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
-         && file_hdr[3]=='F'))
-        die("%s is not an IVF file.", argv[1]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-    /* Initialize codec */
-    if(vpx_codec_dec_init(&codec, interface, NULL, flags))
-        die_codec(&codec, "Failed to initialize decoder");
-
-    /* Read each frame */
-    while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
-        int               frame_sz = mem_get_le32(frame_hdr);
-        vpx_codec_iter_t  iter = NULL;
-        vpx_image_t      *img;
-
-
-        frame_cnt++;
-        if(frame_sz > sizeof(frame))
-            die("Frame %d data too big for example code buffer", frame_sz);
-        if(fread(frame, 1, frame_sz, infile) != frame_sz)
-            die("Frame %d failed to read complete frame", frame_cnt);
-
-        /* Decode the frame */
-        if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0))
-            die_codec(&codec, "Failed to decode frame");
-
-        /* Write decoded data to disk */
-        while((img = vpx_codec_get_frame(&codec, &iter))) {
-            unsigned int plane, y;
-
-            for(plane=0; plane < 3; plane++) {
-                unsigned char *buf =img->planes[plane];
-
-                for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
-                    (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
-                                  outfile);
-                    buf += img->stride[plane];
-                }
-            }
-        }
+  video = vpx_video_open_file(infile);
+  if (!video)
+    die("%s is not an IVF file.", argv[1]);
+
+  iface = get_codec_interface(vpx_video_get_fourcc(video));
+  if (!iface)
+    die("Unknown FOURCC code.");
+
+  printf("Using %s\n", vpx_codec_iface_name(iface));
+
+  if (vpx_codec_dec_init(&codec, iface, NULL, flags))
+    die_codec(&codec, "Failed to initialize decoder");
+
+  while (vpx_video_read_frame(video)) {
+    vpx_codec_iter_t iter = NULL;
+    vpx_image_t *img = NULL;
+    size_t frame_size = 0;
+    const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
+    if (vpx_codec_decode(&codec, frame, frame_size, NULL, 0))
+      die_codec(&codec, "Failed to decode frame");
+
+    while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) {
+      vpx_img_write(img, outfile);
+      ++frame_cnt;
     }
-    printf("Processed %d frames.\n",frame_cnt);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
+  }
+
+  printf("Processed %d frames.\n", frame_cnt);
+  if (vpx_codec_destroy(&codec))
+    die_codec(&codec, "Failed to destroy codec");
+
+  printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
+         vpx_video_get_width(video), vpx_video_get_height(video), argv[2]);
+
+  vpx_video_close(video);
+
+  fclose(outfile);
+  fclose(infile);
 
-    fclose(outfile);
-    fclose(infile);
-    return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
 }
diff --git a/ivfdec.c b/ivfdec.c
index c7f4a8947..b9fec26a4 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -8,10 +8,13 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./ivfdec.h"
-
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+
+#include "./ivfdec.h"
+
+static const char *IVF_SIGNATURE = "DKIF";
 
 static void fix_framerate(int *num, int *den) {
   // Some versions of vpxenc used 1/(2*fps) for the timebase, so
@@ -37,8 +40,7 @@ int file_is_ivf(struct VpxInputContext *input_ctx) {
   int is_ivf = 0;
 
   if (fread(raw_hdr, 1, 32, input_ctx->file) == 32) {
-    if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K' &&
-        raw_hdr[2] == 'I' && raw_hdr[3] == 'F') {
+    if (memcmp(IVF_SIGNATURE, raw_hdr, 4) == 0) {
       is_ivf = 1;
 
       if (mem_get_le16(raw_hdr + 4) != 0) {
@@ -106,3 +108,68 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer,
 
   return 1;
 }
+
+struct vpx_video {
+  FILE *file;
+  unsigned char *buffer;
+  size_t buffer_size;
+  size_t frame_size;
+  unsigned int fourcc;
+  int width;
+  int height;
+};
+
+vpx_video_t *vpx_video_open_file(FILE *file) {
+  char raw_hdr[32];
+  vpx_video_t *video;
+
+  if (fread(raw_hdr, 1, 32, file) != 32)
+    return NULL;  // Can't read file header;
+
+  if (memcmp(IVF_SIGNATURE, raw_hdr, 4) != 0)
+    return NULL;  // Wrong IVF signature
+
+  if (mem_get_le16(raw_hdr + 4) != 0)
+    return NULL;  // Wrong IVF version
+
+  video = (vpx_video_t *)malloc(sizeof(*video));
+  video->file = file;
+  video->buffer = NULL;
+  video->buffer_size = 0;
+  video->frame_size = 0;
+  video->fourcc = mem_get_le32(raw_hdr + 8);
+  video->width = mem_get_le16(raw_hdr + 12);
+  video->height = mem_get_le16(raw_hdr + 14);
+  return video;
+}
+
+void vpx_video_close(vpx_video_t *video) {
+  if (video) {
+    free(video->buffer);
+    free(video);
+  }
+}
+
+int vpx_video_get_width(vpx_video_t *video) {
+  return video->width;
+}
+
+int vpx_video_get_height(vpx_video_t *video) {
+  return video->height;
+}
+
+unsigned int vpx_video_get_fourcc(vpx_video_t *video) {
+  return video->fourcc;
+}
+
+int vpx_video_read_frame(vpx_video_t *video) {
+  return !ivf_read_frame(video->file, &video->buffer, &video->frame_size,
+                         &video->buffer_size);
+}
+
+const unsigned char *vpx_video_get_frame(vpx_video_t *video, size_t *size) {
+  if (size)
+    *size = video->frame_size;
+
+  return video->buffer;
+}
diff --git a/ivfdec.h b/ivfdec.h
index dd29cc617..e960b4aec 100644
--- a/ivfdec.h
+++ b/ivfdec.h
@@ -21,6 +21,34 @@ int file_is_ivf(struct VpxInputContext *input);
 int ivf_read_frame(FILE *infile, uint8_t **buffer,
                    size_t *bytes_read, size_t *buffer_size);
 
+// The following code is work in progress. It is going to be in a separate file
+// and support transparent reading of IVF and Y4M formats. Right now only IVF
+// format is supported for simplicity. The main goal the API is to be
+// simple and easy to use in example code (and probably in vpxenc/vpxdec later).
+// All low-level details like memory buffer management are hidden from API
+// users.
+struct vpx_video;
+typedef struct vpx_video vpx_video_t;
+
+// Opens the input file and inspects it to determine file type. Returns an
+// opaque vpx_video_t* upon success, or NULL upon failure.
+vpx_video_t *vpx_video_open_file(FILE *file);
+
+// Frees all resources associated with vpx_video_t returned from
+// vpx_video_open_file() call
+void vpx_video_close(vpx_video_t *video);
+
+int vpx_video_get_width(vpx_video_t *video);
+int vpx_video_get_height(vpx_video_t *video);
+unsigned int vpx_video_get_fourcc(vpx_video_t *video);
+
+// Reads video frame bytes from the file and stores them into internal buffer.
+int vpx_video_read_frame(vpx_video_t *video);
+
+// Returns the pointer to internal memory buffer with frame bytes read from
+// last call to vpx_video_read_frame().
+const unsigned char *vpx_video_get_frame(vpx_video_t *video, size_t *size);
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/ivfenc.c b/ivfenc.c
index 0041ff044..4a97c4273 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -10,7 +10,6 @@
 
 #include "./ivfenc.h"
 
-#include "./tools_common.h"
 #include "vpx/vpx_encoder.h"
 #include "vpx_ports/mem_ops.h"
 
@@ -24,33 +23,31 @@ void ivf_write_file_header(FILE *outfile,
   header[1] = 'K';
   header[2] = 'I';
   header[3] = 'F';
-  mem_put_le16(header + 4,  0);                 /* version */
-  mem_put_le16(header + 6,  32);                /* headersize */
-  mem_put_le32(header + 8,  fourcc);            /* four CC */
-  mem_put_le16(header + 12, cfg->g_w);          /* width */
-  mem_put_le16(header + 14, cfg->g_h);          /* height */
-  mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
-  mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
-  mem_put_le32(header + 24, frame_cnt);         /* length */
-  mem_put_le32(header + 28, 0);                 /* unused */
-
-  (void) fwrite(header, 1, 32, outfile);
+  mem_put_le16(header + 4, 0);                     // version
+  mem_put_le16(header + 6, 32);                    // header size
+  mem_put_le32(header + 8, fourcc);                // fourcc
+  mem_put_le16(header + 12, cfg->g_w);             // width
+  mem_put_le16(header + 14, cfg->g_h);             // height
+  mem_put_le32(header + 16, cfg->g_timebase.den);  // rate
+  mem_put_le32(header + 20, cfg->g_timebase.num);  // scale
+  mem_put_le32(header + 24, frame_cnt);            // length
+  mem_put_le32(header + 28, 0);                    // unused
+
+  fwrite(header, 1, 32, outfile);
 }
 
-void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt) {
+void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) {
   char header[12];
-  vpx_codec_pts_t pts;
 
-  pts = pkt->data.frame.pts;
-  mem_put_le32(header, (int)pkt->data.frame.sz);
-  mem_put_le32(header + 4, pts & 0xFFFFFFFF);
-  mem_put_le32(header + 8, pts >> 32);
-
-  (void) fwrite(header, 1, 12, outfile);
+  mem_put_le32(header, (int)frame_size);
+  mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
+  mem_put_le32(header + 8, (int)(pts >> 32));
+  fwrite(header, 1, 12, outfile);
 }
 
-void ivf_write_frame_size(FILE *outfile, size_t size) {
+void ivf_write_frame_size(FILE *outfile, size_t frame_size) {
   char header[4];
-  mem_put_le32(header, (int)size);
-  (void) fwrite(header, 1, 4, outfile);
+
+  mem_put_le32(header, (int)frame_size);
+  fwrite(header, 1, 4, outfile);
 }
diff --git a/ivfenc.h b/ivfenc.h
index b486bc809..6623687e8 100644
--- a/ivfenc.h
+++ b/ivfenc.h
@@ -23,8 +23,10 @@ void ivf_write_file_header(FILE *outfile,
                            const struct vpx_codec_enc_cfg *cfg,
                            uint32_t fourcc,
                            int frame_cnt);
-void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt);
-void ivf_write_frame_size(FILE *outfile, size_t size);
+
+void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size);
+
+void ivf_write_frame_size(FILE *outfile, size_t frame_size);
 
 #ifdef __cplusplus
 }  /* extern "C" */
diff --git a/libs.mk b/libs.mk
index 77840a456..cc40451d4 100644
--- a/libs.mk
+++ b/libs.mk
@@ -183,7 +183,6 @@ CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
 
 INSTALL-LIBS-yes += include/vpx/vpx_codec.h
 INSTALL-LIBS-yes += include/vpx/vpx_image.h
-INSTALL-LIBS-yes += include/vpx/vpx_external_frame_buffer.h
 INSTALL-LIBS-yes += include/vpx/vpx_integer.h
 INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder.h
 INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx/vpx_encoder.h
@@ -215,8 +214,11 @@ CLEAN-OBJS += libvpx_srcs.txt
 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)
 
+obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
+	@cp $^ $@
+
+obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
 obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
-	@cp $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat .
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
     --exe \
diff --git a/md5_utils.h b/md5_utils.h
index 9935eae6d..bd4991b3a 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -23,6 +23,10 @@
 #ifndef MD5_UTILS_H_
 #define MD5_UTILS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define md5byte unsigned char
 #define UWORD32 unsigned int
 
@@ -38,4 +42,8 @@ void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len);
 void MD5Final(unsigned char digest[16], struct MD5Context *context);
 void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // MD5_UTILS_H_
diff --git a/rate_hist.c b/rate_hist.c
new file mode 100644
index 000000000..1cef19bdd
--- /dev/null
+++ b/rate_hist.c
@@ -0,0 +1,282 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "./rate_hist.h"
+
+#define RATE_BINS 100
+#define HIST_BAR_MAX 40
+
+struct hist_bucket {
+  int low;
+  int high;
+  int count;
+};
+
+struct rate_hist {
+  int64_t *pts;
+  int *sz;
+  int samples;
+  int frames;
+  struct hist_bucket bucket[RATE_BINS];
+  int total;
+};
+
+struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
+                                      const vpx_rational_t *fps) {
+  int i;
+  struct rate_hist *hist = malloc(sizeof(*hist));
+
+  // Determine the number of samples in the buffer. Use the file's framerate
+  // to determine the number of frames in rc_buf_sz milliseconds, with an
+  // adjustment (5/4) to account for alt-refs
+  hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
+
+  // prevent division by zero
+  if (hist->samples == 0)
+    hist->samples = 1;
+
+  hist->frames = 0;
+  hist->total = 0;
+
+  hist->pts = calloc(hist->samples, sizeof(*hist->pts));
+  hist->sz = calloc(hist->samples, sizeof(*hist->sz));
+  for (i = 0; i < RATE_BINS; i++) {
+    hist->bucket[i].low = INT_MAX;
+    hist->bucket[i].high = 0;
+    hist->bucket[i].count = 0;
+  }
+
+  return hist;
+}
+
+void destroy_rate_histogram(struct rate_hist *hist) {
+  if (hist) {
+    free(hist->pts);
+    free(hist->sz);
+    free(hist);
+  }
+}
+
+void update_rate_histogram(struct rate_hist *hist,
+                           const vpx_codec_enc_cfg_t *cfg,
+                           const vpx_codec_cx_pkt_t *pkt) {
+  int i;
+  int64_t then = 0;
+  int64_t avg_bitrate = 0;
+  int64_t sum_sz = 0;
+  const int64_t now = pkt->data.frame.pts * 1000 *
+                          (uint64_t)cfg->g_timebase.num /
+                              (uint64_t)cfg->g_timebase.den;
+
+  int idx = hist->frames++ % hist->samples;
+  hist->pts[idx] = now;
+  hist->sz[idx] = (int)pkt->data.frame.sz;
+
+  if (now < cfg->rc_buf_initial_sz)
+    return;
+
+  then = now;
+
+  /* Sum the size over the past rc_buf_sz ms */
+  for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) {
+    const int i_idx = (i - 1) % hist->samples;
+
+    then = hist->pts[i_idx];
+    if (now - then > cfg->rc_buf_sz)
+      break;
+    sum_sz += hist->sz[i_idx];
+  }
+
+  if (now == then)
+    return;
+
+  avg_bitrate = sum_sz * 8 * 1000 / (now - then);
+  idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
+  if (idx < 0)
+    idx = 0;
+  if (idx > RATE_BINS - 1)
+    idx = RATE_BINS - 1;
+  if (hist->bucket[idx].low > avg_bitrate)
+    hist->bucket[idx].low = (int)avg_bitrate;
+  if (hist->bucket[idx].high < avg_bitrate)
+    hist->bucket[idx].high = (int)avg_bitrate;
+  hist->bucket[idx].count++;
+  hist->total++;
+}
+
+static int merge_hist_buckets(struct hist_bucket *bucket,
+                              int max_buckets, int *num_buckets) {
+  int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
+  int buckets = *num_buckets;
+  int i;
+
+  /* Find the extrema for this list of buckets */
+  big_bucket = small_bucket = 0;
+  for (i = 0; i < buckets; i++) {
+    if (bucket[i].count < bucket[small_bucket].count)
+      small_bucket = i;
+    if (bucket[i].count > bucket[big_bucket].count)
+      big_bucket = i;
+  }
+
+  /* If we have too many buckets, merge the smallest with an adjacent
+   * bucket.
+   */
+  while (buckets > max_buckets) {
+    int last_bucket = buckets - 1;
+
+    /* merge the small bucket with an adjacent one. */
+    if (small_bucket == 0)
+      merge_bucket = 1;
+    else if (small_bucket == last_bucket)
+      merge_bucket = last_bucket - 1;
+    else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
+      merge_bucket = small_bucket - 1;
+    else
+      merge_bucket = small_bucket + 1;
+
+    assert(abs(merge_bucket - small_bucket) <= 1);
+    assert(small_bucket < buckets);
+    assert(big_bucket < buckets);
+    assert(merge_bucket < buckets);
+
+    if (merge_bucket < small_bucket) {
+      bucket[merge_bucket].high = bucket[small_bucket].high;
+      bucket[merge_bucket].count += bucket[small_bucket].count;
+    } else {
+      bucket[small_bucket].high = bucket[merge_bucket].high;
+      bucket[small_bucket].count += bucket[merge_bucket].count;
+      merge_bucket = small_bucket;
+    }
+
+    assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
+
+    buckets--;
+
+    /* Remove the merge_bucket from the list, and find the new small
+     * and big buckets while we're at it
+     */
+    big_bucket = small_bucket = 0;
+    for (i = 0; i < buckets; i++) {
+      if (i > merge_bucket)
+        bucket[i] = bucket[i + 1];
+
+      if (bucket[i].count < bucket[small_bucket].count)
+        small_bucket = i;
+      if (bucket[i].count > bucket[big_bucket].count)
+        big_bucket = i;
+    }
+  }
+
+  *num_buckets = buckets;
+  return bucket[big_bucket].count;
+}
+
+static void show_histogram(const struct hist_bucket *bucket,
+                           int buckets, int total, int scale) {
+  const char *pat1, *pat2;
+  int i;
+
+  switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
+    case 1:
+    case 2:
+      pat1 = "%4d %2s: ";
+      pat2 = "%4d-%2d: ";
+      break;
+    case 3:
+      pat1 = "%5d %3s: ";
+      pat2 = "%5d-%3d: ";
+      break;
+    case 4:
+      pat1 = "%6d %4s: ";
+      pat2 = "%6d-%4d: ";
+      break;
+    case 5:
+      pat1 = "%7d %5s: ";
+      pat2 = "%7d-%5d: ";
+      break;
+    case 6:
+      pat1 = "%8d %6s: ";
+      pat2 = "%8d-%6d: ";
+      break;
+    case 7:
+      pat1 = "%9d %7s: ";
+      pat2 = "%9d-%7d: ";
+      break;
+    default:
+      pat1 = "%12d %10s: ";
+      pat2 = "%12d-%10d: ";
+      break;
+  }
+
+  for (i = 0; i < buckets; i++) {
+    int len;
+    int j;
+    float pct;
+
+    pct = (float)(100.0 * bucket[i].count / total);
+    len = HIST_BAR_MAX * bucket[i].count / scale;
+    if (len < 1)
+      len = 1;
+    assert(len <= HIST_BAR_MAX);
+
+    if (bucket[i].low == bucket[i].high)
+      fprintf(stderr, pat1, bucket[i].low, "");
+    else
+      fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
+
+    for (j = 0; j < HIST_BAR_MAX; j++)
+      fprintf(stderr, j < len ? "=" : " ");
+    fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
+  }
+}
+
+void show_q_histogram(const int counts[64], int max_buckets) {
+  struct hist_bucket bucket[64];
+  int buckets = 0;
+  int total = 0;
+  int scale;
+  int i;
+
+  for (i = 0; i < 64; i++) {
+    if (counts[i]) {
+      bucket[buckets].low = bucket[buckets].high = i;
+      bucket[buckets].count = counts[i];
+      buckets++;
+      total += counts[i];
+    }
+  }
+
+  fprintf(stderr, "\nQuantizer Selection:\n");
+  scale = merge_hist_buckets(bucket, max_buckets, &buckets);
+  show_histogram(bucket, buckets, total, scale);
+}
+
+void show_rate_histogram(struct rate_hist *hist,
+                         const vpx_codec_enc_cfg_t *cfg, int max_buckets) {
+  int i, scale;
+  int buckets = 0;
+
+  for (i = 0; i < RATE_BINS; i++) {
+    if (hist->bucket[i].low == INT_MAX)
+      continue;
+    hist->bucket[buckets++] = hist->bucket[i];
+  }
+
+  fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
+  scale = merge_hist_buckets(hist->bucket, max_buckets, &buckets);
+  show_histogram(hist->bucket, buckets, hist->total, scale);
+}
diff --git a/rate_hist.h b/rate_hist.h
new file mode 100644
index 000000000..00a1676a6
--- /dev/null
+++ b/rate_hist.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RATE_HIST_H_
+#define RATE_HIST_H_
+
+#include "vpx/vpx_encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rate_hist;
+
+struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
+                                      const vpx_rational_t *fps);
+
+void destroy_rate_histogram(struct rate_hist *hist);
+
+void update_rate_histogram(struct rate_hist *hist,
+                           const vpx_codec_enc_cfg_t *cfg,
+                           const vpx_codec_cx_pkt_t *pkt);
+
+void show_q_histogram(const int counts[64], int max_buckets);
+
+void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg,
+                         int max_buckets);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // RATE_HIST_H_
diff --git a/resize_util.c b/resize_util.c
new file mode 100644
index 000000000..b068f5524
--- /dev/null
+++ b/resize_util.c
@@ -0,0 +1,120 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "./vp9/encoder/vp9_resize.h"
+
+static void usage(char *progname) {
+  printf("Usage:\n");
+  printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
+         progname);
+  printf("<output_yuv> [<frames>]\n");
+}
+
+static int parse_dim(char *v, int *width, int *height) {
+  char *x = strchr(v, 'x');
+  if (x == NULL)
+    x = strchr(v, 'X');
+  if (x == NULL)
+    return 0;
+  *width = atoi(v);
+  *height = atoi(&x[1]);
+  if (*width <= 0 || *height <= 0)
+    return 0;
+  else
+    return 1;
+}
+
+int main(int argc, char *argv[]) {
+  char *fin, *fout;
+  FILE *fpin, *fpout;
+  uint8_t *inbuf, *outbuf;
+  uint8_t *inbuf_u, *outbuf_u;
+  uint8_t *inbuf_v, *outbuf_v;
+  int f, frames;
+  int width, height, target_width, target_height;
+
+  if (argc < 5) {
+    printf("Incorrect parameters:\n");
+    usage(argv[0]);
+    return 1;
+  }
+
+  fin = argv[1];
+  fout = argv[4];
+  if (!parse_dim(argv[2], &width, &height)) {
+    printf("Incorrect parameters: %s\n", argv[2]);
+    usage(argv[0]);
+    return 1;
+  }
+  if (!parse_dim(argv[3], &target_width, &target_height)) {
+    printf("Incorrect parameters: %s\n", argv[3]);
+    usage(argv[0]);
+    return 1;
+  }
+
+  fpin = fopen(fin, "rb");
+  if (fpin == NULL) {
+    printf("Can't open file %s to read\n", fin);
+    usage(argv[0]);
+    return 1;
+  }
+  fpout = fopen(fout, "wb");
+  if (fpout == NULL) {
+    printf("Can't open file %s to write\n", fout);
+    usage(argv[0]);
+    return 1;
+  }
+  if (argc >= 6)
+    frames = atoi(argv[5]);
+  else
+    frames = INT_MAX;
+
+  printf("Input size:  %dx%d\n",
+         width, height);
+  printf("Target size: %dx%d, Frames: ",
+         target_width, target_height);
+  if (frames == INT_MAX)
+    printf("All\n");
+  else
+    printf("%d\n", frames);
+
+  inbuf = (uint8_t*)malloc(width * height * 3 / 2);
+  outbuf = (uint8_t*)malloc(target_width * target_height * 3 / 2);
+  inbuf_u = inbuf + width * height;
+  inbuf_v = inbuf_u + width * height / 4;
+  outbuf_u = outbuf + target_width * target_height;
+  outbuf_v = outbuf_u + target_width * target_height / 4;
+  f = 0;
+  while (f < frames) {
+    if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1)
+      break;
+    vp9_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2,
+                        height, width,
+                        outbuf, target_width, outbuf_u, outbuf_v,
+                        target_width / 2,
+                        target_height, target_width);
+    fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout);
+    f++;
+  }
+  printf("%d frames processed\n", f);
+  fclose(fpin);
+  fclose(fpout);
+
+  free(inbuf);
+  free(outbuf);
+  return 0;
+}
diff --git a/test/clear_system_state.h b/test/clear_system_state.h
index 8f08a4cdb..5e7679744 100644
--- a/test/clear_system_state.h
+++ b/test/clear_system_state.h
@@ -11,11 +11,9 @@
 #define TEST_CLEAR_SYSTEM_STATE_H_
 
 #include "./vpx_config.h"
-extern "C" {
 #if ARCH_X86 || ARCH_X86_64
 # include "vpx_ports/x86.h"
 #endif
-}
 
 namespace libvpx_test {
 
diff --git a/test/codec_factory.h b/test/codec_factory.h
index c060e86dc..80e87c883 100644
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -24,8 +24,6 @@
 #include "test/encode_test_driver.h"
 namespace libvpx_test {
 
-const int kCodecFactoryParam = 0;
-
 class CodecFactory {
  public:
   CodecFactory() {}
@@ -73,7 +71,7 @@ class VP8Decoder : public Decoder {
       : Decoder(cfg, deadline) {}
 
  protected:
-  virtual const vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_DECODER
     return &vpx_codec_vp8_dx_algo;
 #else
@@ -89,7 +87,7 @@ class VP8Encoder : public Encoder {
       : Encoder(cfg, deadline, init_flags, stats) {}
 
  protected:
-  virtual const vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_ENCODER
     return &vpx_codec_vp8_cx_algo;
 #else
@@ -155,7 +153,7 @@ class VP9Decoder : public Decoder {
       : Decoder(cfg, deadline) {}
 
  protected:
-  virtual const vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_DECODER
     return &vpx_codec_vp9_dx_algo;
 #else
@@ -171,7 +169,7 @@ class VP9Encoder : public Encoder {
       : Encoder(cfg, deadline, init_flags, stats) {}
 
  protected:
-  virtual const vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_ENCODER
     return &vpx_codec_vp9_cx_algo;
 #else
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 9ab60b1c3..e920de874 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -14,13 +14,11 @@
 #include "test/util.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
-extern "C" {
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_filter.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
-}
 
 namespace {
 typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 5496d0b62..ce0431860 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -18,12 +18,13 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-extern "C" {
-#include "vp9/common/vp9_entropy.h"
 #include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+extern "C" {
 void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *output, int pitch);
 }
-#include "vpx/vpx_integer.h"
 
 using libvpx_test::ACMRandom;
 
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index a1e472a0a..a2608acb5 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -18,12 +18,9 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-extern "C" {
 #include "./vpx_config.h"
-#include "vp9/common/vp9_entropy.h"
 #include "./vp9_rtcd.h"
-}
-
+#include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_integer.h"
 
 using libvpx_test::ACMRandom;
diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h
index 79db6e1bc..3e8ff724b 100644
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -76,18 +76,8 @@ class Decoder {
     return detail ? detail : vpx_codec_error(&decoder_);
   }
 
-  // Passes the external frame buffer information to libvpx.
-  vpx_codec_err_t SetExternalFrameBuffers(
-      vpx_codec_frame_buffer_t *fb_list, int fb_count,
-      vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv) {
-    InitOnce();
-    return vpx_codec_set_frame_buffers(&decoder_,
-                                       fb_list, fb_count,
-                                       cb, user_priv);
-  }
-
  protected:
-  virtual const vpx_codec_iface_t* CodecInterface() const = 0;
+  virtual vpx_codec_iface_t* CodecInterface() const = 0;
 
   void InitOnce() {
     if (!init_done_) {
diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
index dbdc33c8e..4dabcd5b4 100644
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -128,7 +128,7 @@ class Encoder {
   }
 
  protected:
-  virtual const vpx_codec_iface_t* CodecInterface() const = 0;
+  virtual vpx_codec_iface_t* CodecInterface() const = 0;
 
   const char *EncoderError() {
     const char *detail = vpx_codec_error_detail(&encoder_);
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
deleted file mode 100644
index 48eb8530e..000000000
--- a/test/external_frame_buffer_test.cc
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/test_vectors.h"
-#include "test/util.h"
-#include "test/webm_video_source.h"
-
-namespace {
-
-const int kVideoNameParam = 1;
-const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
-
-// Callback used by libvpx to request the application to allocate a frame
-// buffer of at least |new_size| in bytes.
-int realloc_vp9_frame_buffer(void *user_priv, size_t new_size,
-                             vpx_codec_frame_buffer_t *fb) {
-  (void)user_priv;
-  if (fb == NULL)
-    return -1;
-
-  delete [] fb->data;
-  fb->data = new uint8_t[new_size];
-  fb->size = new_size;
-  return 0;
-}
-
-// Callback will not allocate data for frame buffer.
-int zero_realloc_vp9_frame_buffer(void *user_priv, size_t new_size,
-                                  vpx_codec_frame_buffer_t *fb) {
-  (void)user_priv;
-  if (fb == NULL)
-    return -1;
-
-  delete [] fb->data;
-  fb->data = NULL;
-  fb->size = new_size;
-  return 0;
-}
-
-// Callback will allocate one less byte.
-int one_less_byte_realloc_vp9_frame_buffer(void *user_priv, size_t new_size,
-                                           vpx_codec_frame_buffer_t *fb) {
-  (void)user_priv;
-  if (fb == NULL)
-    return -1;
-
-  delete [] fb->data;
-
-  const size_t error_size = new_size - 1;
-  fb->data = new uint8_t[error_size];
-  fb->size = error_size;
-  return 0;
-}
-
-// Class for testing passing in external frame buffers to libvpx.
-class ExternalFrameBufferMD5Test
-    : public ::libvpx_test::DecoderTest,
-      public ::libvpx_test::CodecTestWithParam<const char*> {
- protected:
-  ExternalFrameBufferMD5Test()
-      : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)),
-        md5_file_(NULL),
-        num_buffers_(0),
-        frame_buffers_(NULL) {}
-
-  virtual ~ExternalFrameBufferMD5Test() {
-    for (int i = 0; i < num_buffers_; ++i) {
-      delete [] frame_buffers_[i].data;
-    }
-    delete [] frame_buffers_;
-
-    if (md5_file_ != NULL)
-      fclose(md5_file_);
-  }
-
-  virtual void PreDecodeFrameHook(
-      const libvpx_test::CompressedVideoSource &video,
-      libvpx_test::Decoder *decoder) {
-    if (num_buffers_ > 0 && video.frame_number() == 0) {
-      // Have libvpx use frame buffers we create.
-      frame_buffers_ = new vpx_codec_frame_buffer_t[num_buffers_];
-      memset(frame_buffers_, 0, sizeof(frame_buffers_[0]) * num_buffers_);
-
-      ASSERT_EQ(VPX_CODEC_OK,
-                decoder->SetExternalFrameBuffers(
-                    frame_buffers_, num_buffers_,
-                    realloc_vp9_frame_buffer, NULL));
-    }
-  }
-
-  void OpenMD5File(const std::string &md5_file_name_) {
-    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
-        << md5_file_name_;
-  }
-
-  virtual void DecompressedFrameHook(const vpx_image_t &img,
-                                     const unsigned int frame_number) {
-    ASSERT_TRUE(md5_file_ != NULL);
-    char expected_md5[33];
-    char junk[128];
-
-    // Read correct md5 checksums.
-    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
-    ASSERT_NE(EOF, res) << "Read md5 data failed";
-    expected_md5[32] = '\0';
-
-    ::libvpx_test::MD5 md5_res;
-    md5_res.Add(&img);
-    const char *const actual_md5 = md5_res.Get();
-
-    // Check md5 match.
-    ASSERT_STREQ(expected_md5, actual_md5)
-        << "Md5 checksums don't match: frame number = " << frame_number;
-  }
-
-  void set_num_buffers(int num_buffers) { num_buffers_ = num_buffers; }
-  int num_buffers() const { return num_buffers_; }
-
- private:
-  FILE *md5_file_;
-  int num_buffers_;
-  vpx_codec_frame_buffer_t *frame_buffers_;
-};
-
-class ExternalFrameBufferTest : public ::testing::Test {
- protected:
-  ExternalFrameBufferTest()
-      : video_(NULL),
-        decoder_(NULL),
-        num_buffers_(0),
-        frame_buffers_(NULL) {}
-
-  virtual void SetUp() {
-    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
-    video_->Init();
-    video_->Begin();
-
-    vpx_codec_dec_cfg_t cfg = {0};
-    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
-  }
-
-  virtual void TearDown() {
-    for (int i = 0; i < num_buffers_; ++i) {
-      delete [] frame_buffers_[i].data;
-    }
-    delete [] frame_buffers_;
-    delete decoder_;
-    delete video_;
-  }
-
-  // Passes the external frame buffer information to libvpx.
-  vpx_codec_err_t SetExternalFrameBuffers(
-      int num_buffers,
-      vpx_realloc_frame_buffer_cb_fn_t cb) {
-    if (num_buffers > 0) {
-      num_buffers_ = num_buffers;
-
-      // Have libvpx use frame buffers we create.
-      frame_buffers_ = new vpx_codec_frame_buffer_t[num_buffers_];
-      memset(frame_buffers_, 0, sizeof(frame_buffers_[0]) * num_buffers_);
-    }
-
-    return decoder_->SetExternalFrameBuffers(frame_buffers_, num_buffers_,
-                                             cb, NULL);
-  }
-
-  // Pass Null frame buffer list to libvpx.
-  vpx_codec_err_t SetNullFrameBuffers(
-      int num_buffers,
-      vpx_realloc_frame_buffer_cb_fn_t cb) {
-    return decoder_->SetExternalFrameBuffers(NULL, num_buffers,
-                                             cb, NULL);
-  }
-
-  vpx_codec_err_t DecodeOneFrame() {
-    const vpx_codec_err_t res =
-        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-    if (res == VPX_CODEC_OK)
-      video_->Next();
-    return res;
-  }
-
-  vpx_codec_err_t DecodeRemainingFrames() {
-    for (; video_->cxdata(); video_->Next()) {
-      const vpx_codec_err_t res =
-          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-      if (res != VPX_CODEC_OK)
-        return res;
-
-      libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
-      const vpx_image_t *img = NULL;
-
-      // Get decompressed data
-      while ((img = dec_iter.Next())) {
-      }
-    }
-    return VPX_CODEC_OK;
-  }
-
-  libvpx_test::WebMVideoSource *video_;
-  libvpx_test::VP9Decoder *decoder_;
-  int num_buffers_;
-  vpx_codec_frame_buffer_t *frame_buffers_;
-};
-
-
-// This test runs through the set of test vectors, and decodes them.
-// Libvpx will call into the application to allocate a frame buffer when
-// needed. The md5 checksums are computed for each frame in the video file.
-// If md5 checksums match the correct md5 data, then the test is passed.
-// Otherwise, the test failed.
-TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
-  const std::string filename = GET_PARAM(kVideoNameParam);
-  libvpx_test::CompressedVideoSource *video = NULL;
-
-  // Number of buffers equals number of possible reference buffers(8), plus
-  // one working buffer, plus four jitter buffers.
-  const int num_buffers = 13;
-  set_num_buffers(num_buffers);
-
-#if CONFIG_VP8_DECODER
-  // Tell compiler we are not using kVP8TestVectors.
-  (void)libvpx_test::kVP8TestVectors;
-#endif
-
-  // Open compressed video file.
-  if (filename.substr(filename.length() - 3, 3) == "ivf") {
-    video = new libvpx_test::IVFVideoSource(filename);
-  } else if (filename.substr(filename.length() - 4, 4) == "webm") {
-    video = new libvpx_test::WebMVideoSource(filename);
-  }
-  video->Init();
-
-  // Construct md5 file name.
-  const std::string md5_filename = filename + ".md5";
-  OpenMD5File(md5_filename);
-
-  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
-}
-
-TEST_F(ExternalFrameBufferTest, NineFrameBuffers) {
-  // Minimum number of external frame buffers for VP9 is
-  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
-  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
-  ASSERT_EQ(VPX_CODEC_OK,
-            SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
-  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
-}
-
-TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
-  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
-  // #VPX_MAXIMUM_WORK_BUFFERS + eight jitter buffers.
-  const int jitter_buffers = 8;
-  const int num_buffers =
-      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
-  ASSERT_EQ(VPX_CODEC_OK,
-            SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
-  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
-}
-
-TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
-  // Minimum number of external frame buffers for VP9 is
-  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. Set one less.
-  const int num_buffers =
-      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS - 1;
-  ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
-            SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
-}
-
-TEST_F(ExternalFrameBufferTest, NullFrameBufferList) {
-  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
-  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
-  const int jitter_buffers = 4;
-  const int num_buffers =
-      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
-  ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
-            SetNullFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
-}
-
-TEST_F(ExternalFrameBufferTest, NullRealloc) {
-  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
-  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
-  const int jitter_buffers = 4;
-  const int num_buffers =
-      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
-  ASSERT_EQ(VPX_CODEC_OK,
-            SetExternalFrameBuffers(num_buffers,
-                                    zero_realloc_vp9_frame_buffer));
-  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame());
-}
-
-TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
-  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
-  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
-  const int jitter_buffers = 4;
-  const int num_buffers =
-      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
-  ASSERT_EQ(VPX_CODEC_OK,
-            SetExternalFrameBuffers(num_buffers,
-                                    one_less_byte_realloc_vp9_frame_buffer));
-  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame());
-}
-
-VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
-                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors));
-}  // namespace
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 67426eb74..5db5f5cae 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -18,12 +18,13 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-extern "C" {
-#include "vp9/common/vp9_entropy.h"
 #include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+extern "C" {
 void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *output, int pitch);
 }
-#include "vpx/vpx_integer.h"
 
 using libvpx_test::ACMRandom;
 
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 19ffe26aa..beef98055 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -18,12 +18,13 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-extern "C" {
-#include "vp9/common/vp9_entropy.h"
 #include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+extern "C" {
 void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *output, int pitch);
 }
-#include "vpx/vpx_integer.h"
 
 using libvpx_test::ACMRandom;
 
diff --git a/test/i420_video_source.h b/test/i420_video_source.h
index 2bf2a0334..c3315f9ce 100644
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -52,7 +52,7 @@ class I420VideoSource : public VideoSource {
     ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
         << file_name_;
     if (start_) {
-      fseek(input_file_, raw_sz_ * start_, SEEK_SET);
+      fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
     }
 
     frame_ = start_;
diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
index 5fba70025..b28f5fbe6 100644
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -14,12 +14,11 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-extern "C" {
+
 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
-}
 
 namespace {
 
diff --git a/test/lru_frame_buffer_test.cc b/test/lru_frame_buffer_test.cc
deleted file mode 100644
index cd6b432d8..000000000
--- a/test/lru_frame_buffer_test.cc
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <queue>
-#include <string>
-
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "test/webm_video_source.h"
-
-namespace {
-
-const int kVideoNameParam = 1;
-
-const char *kLRUTestVectors[] = {
-  "vp90-2-02-size-lf-1920x1080.webm",
-  "vp90-2-05-resize.ivf",
-};
-
-// Callback used by libvpx to request the application to allocate a frame
-// buffer of at least |new_size| in bytes.
-int realloc_vp9_frame_buffer(void *user_priv, size_t new_size,
-                             vpx_codec_frame_buffer_t *fb) {
-  (void)user_priv;
-  if (fb == NULL)
-    return -1;
-
-  delete [] fb->data;
-  fb->data = new uint8_t[new_size];
-  fb->size = new_size;
-
-  return 0;
-}
-
-// Class for testing libvpx is using the least recently
-// used frame buffer when a new buffer is requested.
-class LRUFrameBufferTest
-    : public ::libvpx_test::DecoderTest,
-      public ::libvpx_test::CodecTestWithParam<const char*> {
- protected:
-  struct FrameBufferMD5Sum {
-    int frame_buffer_index;
-    vpx_image_t img;
-    std::string md5;
-  };
-
-  LRUFrameBufferTest()
-      : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)),
-        num_buffers_(0),
-        num_jitter_buffers_(0),
-        frame_buffers_(NULL) {}
-
-  virtual ~LRUFrameBufferTest() {
-    for (int i = 0; i < num_buffers_; ++i) {
-      delete [] frame_buffers_[i].data;
-    }
-    delete [] frame_buffers_;
-  }
-
-  virtual void PreDecodeFrameHook(
-      const libvpx_test::CompressedVideoSource &video,
-      libvpx_test::Decoder *decoder) {
-    // Use external buffers for testing jitter buffers.
-    if (num_jitter_buffers_ > 0 && video.frame_number() == 0) {
-      const int max_reference_buffers = 8;
-
-      // Add 1 for a work buffer.
-      num_buffers_ = max_reference_buffers + 1 + num_jitter_buffers_;
-
-      // Have libvpx use frame buffers we create.
-      frame_buffers_ = new vpx_codec_frame_buffer_t[num_buffers_];
-      memset(frame_buffers_, 0, sizeof(frame_buffers_[0]) * num_buffers_);
-
-      decoder->SetExternalFrameBuffers(frame_buffers_, num_buffers_,
-                                       realloc_vp9_frame_buffer, NULL);
-    }
-
-    // Turn on frame buffer LRU cache.
-    decoder->Control(VP9D_SET_FRAME_BUFFER_LRU_CACHE, 1);
-  }
-
-  virtual void DecompressedFrameHook(const vpx_image_t &img,
-                                     const unsigned int frame_number) {
-    const uint32_t ximg_y_plane = 0;
-    const uint8_t *const y_buffer = img.planes[ximg_y_plane];
-
-    // Find which external buffer contains the y_buffer.
-    int i = 0;
-    for (i = 0; i < num_buffers_; ++i) {
-      if (y_buffer >= frame_buffers_[i].data &&
-          y_buffer < (frame_buffers_[i].data + frame_buffers_[i].size)) {
-        break;
-      }
-    }
-
-    FrameBufferMD5Sum fb_md5;
-    fb_md5.frame_buffer_index = i;
-    fb_md5.img = img;
-
-    libvpx_test::MD5 md5;
-    md5.Add(&img);
-    fb_md5.md5 = md5.Get();
-    jitter_buffer_md5_sums_.push(fb_md5);
-
-    // Check to see if any of the reconstructed image changed.
-    if (jitter_buffer_md5_sums_.size() >
-        static_cast<size_t>(num_jitter_buffers_)) {
-      fb_md5 = jitter_buffer_md5_sums_.front();
-
-      libvpx_test::MD5 md5;
-      md5.Add(&fb_md5.img);
-      const std::string check_str = md5.Get();
-
-      ASSERT_EQ(fb_md5.md5, check_str);
-      jitter_buffer_md5_sums_.pop();
-    }
-  }
-
-  libvpx_test::CompressedVideoSource *OpenCompressedFile(
-      const std::string &filename) {
-    if (filename.substr(filename.length() - 3, 3) == "ivf") {
-      return new libvpx_test::IVFVideoSource(filename);
-    } else if (filename.substr(filename.length() - 4, 4) == "webm") {
-      return new libvpx_test::WebMVideoSource(filename);
-    }
-    return NULL;
-  }
-
-  void set_num_jitter_buffers(int num_buffers) {
-    num_jitter_buffers_ = num_buffers;
-  }
-
- private:
-  // Total number of external frame buffers.
-  int num_buffers_;
-  int num_jitter_buffers_;
-
-  // External frame buffers used by libvpx.
-  vpx_codec_frame_buffer_t *frame_buffers_;
-
-  // Save the md5 checksums for later comparison.
-  std::queue<FrameBufferMD5Sum> jitter_buffer_md5_sums_;
-};
-
-// This test runs through a set of test vectors, and decodes them.
-// Libvpx will call into the application to allocate a frame buffer when
-// needed. The md5 checksums are computed for each frame after it is
-// decoded and stored to be checked later. After a jitter frame buffer
-// has expired, the md5 checksum is computed again for the expired jitter
-// buffer frame and checked against the md5 checksum after the frame was
-// decoded. If md5 checksums match, then the test is passed. Otherwise,
-// the test failed.
-TEST_P(LRUFrameBufferTest, CheckLRUOneJitterBuffer) {
-  const std::string filename = GET_PARAM(kVideoNameParam);
-
-  set_num_jitter_buffers(1);
-
-  libvpx_test::CompressedVideoSource *const video =
-      OpenCompressedFile(filename);
-  video->Init();
-
-  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
-}
-
-TEST_P(LRUFrameBufferTest, CheckLRUFourJitterBuffers) {
-  const std::string filename = GET_PARAM(kVideoNameParam);
-
-  set_num_jitter_buffers(4);
-
-  libvpx_test::CompressedVideoSource *const video =
-      OpenCompressedFile(filename);
-  video->Init();
-
-  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
-}
-
-TEST_P(LRUFrameBufferTest, CheckLRUEightJitterBuffers) {
-  const std::string filename = GET_PARAM(kVideoNameParam);
-
-  set_num_jitter_buffers(8);
-
-  libvpx_test::CompressedVideoSource *const video =
-      OpenCompressedFile(filename);
-  video->Init();
-
-  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
-}
-
-VP9_INSTANTIATE_TEST_CASE(LRUFrameBufferTest,
-                          ::testing::ValuesIn(kLRUTestVectors));
-}  // namespace
diff --git a/test/md5_helper.h b/test/md5_helper.h
index 289f6089b..dd446f4f6 100644
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -11,10 +11,8 @@
 #ifndef TEST_MD5_HELPER_H_
 #define TEST_MD5_HELPER_H_
 
-extern "C" {
 #include "./md5_utils.h"
 #include "vpx/vpx_decoder.h"
-}
 
 namespace libvpx_test {
 class MD5 {
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index a6569c0c3..2a32410f3 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -18,12 +18,9 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-extern "C" {
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_scan.h"
-}
-
 #include "vpx/vpx_integer.h"
 
 using libvpx_test::ACMRandom;
diff --git a/test/set_roi.cc b/test/set_roi.cc
index 9d2e7715f..e28f511be 100644
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -18,11 +18,9 @@
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/acm_random.h"
+#include "vp8/encoder/onyx_int.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
-extern "C" {
-#include "vp8/encoder/onyx_int.h"
-}
 
 using libvpx_test::ACMRandom;
 
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index d1f272972..9e242a246 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -12,13 +12,11 @@
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
-extern "C" {
 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vp8/encoder/block.h"
 #include "vpx_mem/vpx_mem.h"
-}
 
 typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch);
 
@@ -61,7 +59,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
     int16_t *src_diff = be.src_diff;
     for (int r = 0; r < kBlockHeight; ++r) {
       for (int c = 0; c < kBlockWidth; ++c) {
-        src_diff[c] = static_cast<int16_t>(0xa5a5);
+        src_diff[c] = static_cast<int16_t>(0xa5a5u);
       }
       src_diff += kDiffPredStride;
     }
diff --git a/test/svc_test.cc b/test/svc_test.cc
index 3ddd9c145..75659d50d 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -234,7 +234,7 @@ TEST_F(SvcTest, FirstFrameHasLayers) {
   video.Begin();
 
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
-                       video.duration(), VPX_DL_REALTIME);
+                       video.duration(), VPX_DL_GOOD_QUALITY);
   EXPECT_EQ(VPX_CODEC_OK, res);
 
   const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
@@ -262,7 +262,7 @@ TEST_F(SvcTest, EncodeThreeFrames) {
   video.Begin();
   // This frame is a keyframe.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
-                       video.duration(), VPX_DL_REALTIME);
+                       video.duration(), VPX_DL_GOOD_QUALITY);
   ASSERT_EQ(VPX_CODEC_OK, res);
   EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
 
@@ -275,7 +275,7 @@ TEST_F(SvcTest, EncodeThreeFrames) {
   video.Next();
   // This is a P-frame.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
-                       video.duration(), VPX_DL_REALTIME);
+                       video.duration(), VPX_DL_GOOD_QUALITY);
   ASSERT_EQ(VPX_CODEC_OK, res);
   EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
 
@@ -288,7 +288,7 @@ TEST_F(SvcTest, EncodeThreeFrames) {
   video.Next();
   // This is a P-frame.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
-                       video.duration(), VPX_DL_REALTIME);
+                       video.duration(), VPX_DL_GOOD_QUALITY);
   ASSERT_EQ(VPX_CODEC_OK, res);
   EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
 
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 6daf69e63..a28773133 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -574,3 +574,5 @@ d48c5db1b0f8e60521a7c749696b8067886033a3  vp90-2-09-aq2.webm
 84c1599298aac78f2fc05ae2274575d10569dfa0  vp90-2-09-aq2.webm.md5
 55fc55ed73d578ed60fad05692579873f8bad758  vp90-2-09-lf_deltas.webm
 54638c38009198c38c8f3b25c182b709b6c1fd2e  vp90-2-09-lf_deltas.webm.md5
+510d95f3beb3b51c572611fdaeeece12277dac30  vp90-2-10-show-existing-frame.webm
+14d631096f4bfa2d71f7f739aec1448fb3c33bad  vp90-2-10-show-existing-frame.webm.md5
diff --git a/test/test.mk b/test/test.mk
index 178b16210..a65decf73 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -36,8 +36,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../md5_utils.h ../md5_utils.c
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += lru_frame_buffer_test.cc
 
 ## WebM Parsing
 NESTEGG_SRCS                           += ../nestegg/halloc/halloc.h
@@ -678,6 +676,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-lf_deltas.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-lf_deltas.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm.md5
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
index bbcbcf9bc..fc035af8f 100644
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -9,10 +9,10 @@
  */
 #include <string>
 #include "./vpx_config.h"
-extern "C" {
 #if ARCH_X86 || ARCH_X86_64
 #include "vpx_ports/x86.h"
 #endif
+extern "C" {
 #if CONFIG_VP8
 extern void vp8_rtcd();
 #endif
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 4adf9af91..53b7636b6 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -35,7 +35,7 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 
   void OpenMD5File(const std::string& md5_file_name_) {
     md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: "
+    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
         << md5_file_name_;
   }
 
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 16298d0ff..3227f5253 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -160,6 +160,7 @@ const char *kVP9TestVectors[kNumVp9TestVectors] = {
   "vp90-2-02-size-lf-1920x1080.webm",
   "vp90-2-09-aq2.webm",
   "vp90-2-09-lf_deltas.webm",
+  "vp90-2-10-show-existing-frame.webm",
 #if CONFIG_NON420
   "vp91-2-04-yv444.webm"
 #endif
diff --git a/test/test_vectors.h b/test/test_vectors.h
index 5f62e9919..eb592de64 100644
--- a/test/test_vectors.h
+++ b/test/test_vectors.h
@@ -22,9 +22,9 @@ extern const char *kVP8TestVectors[kNumVp8TestVectors];
 
 #if CONFIG_VP9_DECODER
 #if CONFIG_NON420
-const int kNumVp9TestVectors = 216;
+const int kNumVp9TestVectors = 217;
 #else
-const int kNumVp9TestVectors = 215;
+const int kNumVp9TestVectors = 216;
 #endif
 
 extern const char *kVP9TestVectors[kNumVp9TestVectors];
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 3f5560170..b9144ffab 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -17,17 +17,15 @@
 
 #include "vpx/vpx_integer.h"
 #include "./vpx_config.h"
-extern "C" {
 #include "vpx_mem/vpx_mem.h"
 #if CONFIG_VP8_ENCODER
-# include "vp8/common/variance.h"
 # include "./vp8_rtcd.h"
+# include "vp8/common/variance.h"
 #endif
 #if CONFIG_VP9_ENCODER
-# include "vp9/encoder/vp9_variance.h"
 # include "./vp9_rtcd.h"
+# include "vp9/encoder/vp9_variance.h"
 #endif
-}
 #include "test/acm_random.h"
 
 namespace {
diff --git a/test/vp8_boolcoder_test.cc b/test/vp8_boolcoder_test.cc
index 0383af207..7c6c60148 100644
--- a/test/vp8_boolcoder_test.cc
+++ b/test/vp8_boolcoder_test.cc
@@ -20,10 +20,8 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vpx/vpx_integer.h"
 
-extern "C" {
 #include "vp8/encoder/boolhuff.h"
 #include "vp8/decoder/dboolhuff.h"
-}
 
 namespace {
 const int num_tests = 10;
@@ -45,7 +43,7 @@ void encrypt_buffer(uint8_t *buffer, int size) {
 
 void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
                            uint8_t *output, int count) {
-  int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
+  const size_t offset = input - reinterpret_cast<uint8_t*>(decrypt_state);
   for (int i = 0; i < count; i++) {
     output[i] = input[i] ^ secret_key[(offset + i) & 15];
   }
diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc
index 1593c0267..c579adeac 100644
--- a/test/vp9_boolcoder_test.cc
+++ b/test/vp9_boolcoder_test.cc
@@ -14,10 +14,8 @@
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
-extern "C" {
 #include "vp9/decoder/vp9_reader.h"
 #include "vp9/encoder/vp9_writer.h"
-}
 
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
diff --git a/test/vp9_subtract_test.cc b/test/vp9_subtract_test.cc
index e4c4cfeff..d7df2867d 100644
--- a/test/vp9_subtract_test.cc
+++ b/test/vp9_subtract_test.cc
@@ -12,12 +12,10 @@
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
-extern "C" {
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vpx_mem/vpx_mem.h"
-}
 
 typedef void (*subtract_fn_t)(int rows, int cols,
                               int16_t *diff_ptr, ptrdiff_t diff_stride,
diff --git a/test/y4m_video_source.h b/test/y4m_video_source.h
index bd86c2c04..20d2be02b 100644
--- a/test/y4m_video_source.h
+++ b/test/y4m_video_source.h
@@ -12,9 +12,7 @@
 #include <string>
 
 #include "test/video_source.h"
-extern "C" {
 #include "./y4minput.h"
-}
 
 namespace libvpx_test {
 
diff --git a/tools_common.c b/tools_common.c
index 9c24983de..85bedc99d 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -15,6 +15,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#include "vpx/vp8dx.h"
+#endif
+
 #if defined(_WIN32) || defined(__OS2__)
 #include <io.h>
 #include <fcntl.h>
@@ -60,6 +64,15 @@ void warn(const char *fmt, ...) {
   LOG_ERROR("Warning");
 }
 
+void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
+  const char *detail = vpx_codec_error_detail(ctx);
+
+  printf("%s: %s\n", s, vpx_codec_error(ctx));
+  if (detail)
+    printf("    %s\n", detail);
+  exit(EXIT_FAILURE);
+}
+
 uint16_t mem_get_le16(const void *data) {
   uint16_t val;
   const uint8_t *mem = (const uint8_t*)data;
@@ -130,3 +143,34 @@ int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) {
 
   return shortread;
 }
+
+vpx_codec_iface_t *get_codec_interface(unsigned int fourcc) {
+  switch (fourcc) {
+#if CONFIG_VP8_DECODER
+    case VP8_FOURCC:
+      return vpx_codec_vp8_dx();
+#endif
+#if CONFIG_VP9_DECODER
+    case VP9_FOURCC:
+      return vpx_codec_vp9_dx();
+#endif
+    default:
+      return NULL;
+  }
+  return NULL;
+}
+
+void vpx_img_write(const vpx_image_t *img, FILE *file) {
+  int plane, y;
+
+  for (plane = 0; plane < 3; ++plane) {
+    const unsigned char *buf = img->planes[plane];
+    const int stride = img->stride[plane];
+    const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
+    const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
+    for (y = 0; y < h; ++y) {
+      fwrite(buf, 1, w, file);
+      buf += stride;
+    }
+  }
+}
diff --git a/tools_common.h b/tools_common.h
index 1d70ab5e8..967b7a1fb 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -13,6 +13,7 @@
 #include <stdio.h>
 
 #include "./vpx_config.h"
+#include "vpx/vpx_codec.h"
 #include "vpx/vpx_image.h"
 #include "vpx/vpx_integer.h"
 
@@ -112,6 +113,8 @@ void die(const char *fmt, ...);
 void fatal(const char *fmt, ...);
 void warn(const char *fmt, ...);
 
+void die_codec(vpx_codec_ctx_t *ctx, const char *s);
+
 /* The tool including this file must define usage_exit() */
 void usage_exit();
 
@@ -120,6 +123,12 @@ uint32_t mem_get_le32(const void *data);
 
 int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame);
 
+vpx_codec_iface_t *get_codec_interface(unsigned int fourcc);
+
+// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part
+// of vpx_image_t support
+void vpx_img_write(const vpx_image_t *img, FILE *file);
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h
index 38f89a0f1..93e99d76b 100644
--- a/vp8/common/alloccommon.h
+++ b/vp8/common/alloccommon.h
@@ -14,10 +14,18 @@
 
 #include "onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_create_common(VP8_COMMON *oci);
 void vp8_remove_common(VP8_COMMON *oci);
 void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
 int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
 void vp8_setup_version(VP8_COMMON *oci);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_ALLOCCOMMON_H_
diff --git a/vp8/common/arm/bilinearfilter_arm.h b/vp8/common/arm/bilinearfilter_arm.h
index dd3ff14fb..6b84e6f3b 100644
--- a/vp8/common/arm/bilinearfilter_arm.h
+++ b/vp8/common/arm/bilinearfilter_arm.h
@@ -12,6 +12,10 @@
 #ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
 #define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_filter_block2d_bil_first_pass_armv6
 (
     const unsigned char  *src_ptr,
@@ -32,4 +36,8 @@ extern void vp8_filter_block2d_bil_second_pass_armv6
     const short         *vp8_filter
 );
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index c3caee10e..ea1a6a4ad 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -20,6 +20,10 @@ void vpx_log(const char *format, ...);
 #include "treecoder.h"
 #include "vpx_ports/mem.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3
@@ -297,4 +301,8 @@ typedef struct macroblockd
 extern void vp8_build_block_doffsets(MACROBLOCKD *x);
 extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_BLOCKD_H_
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 90d290d6e..d96a19e74 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -11,6 +11,10 @@
 #ifndef VP8_COMMON_COEFUPDATEPROBS_H_
 #define VP8_COMMON_COEFUPDATEPROBS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Update probabilities for the nodes in the token entropy tree.
    Generated file included by entropy.c */
 
@@ -186,4 +190,8 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE
     },
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_COEFUPDATEPROBS_H_
diff --git a/vp8/common/common.h b/vp8/common/common.h
index 9671da00f..ee5b58c75 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -18,6 +18,10 @@
 
 #include "vpx_mem/vpx_mem.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Only need this for fixed-size arrays, for structs just assign. */
 
 #define vp8_copy( Dest, Src) { \
@@ -37,4 +41,8 @@
 #define vp8_zero_array( Dest, N)  vpx_memset( Dest, 0, N * sizeof( *Dest));
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_COMMON_H_
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
index 836854594..4d69e4be6 100644
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -11,6 +11,10 @@
 #ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
 #define VP8_COMMON_DEFAULT_COEF_PROBS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*Generated file, included by entropy.c*/
 
 
@@ -189,4 +193,8 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES]
     }
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_DEFAULT_COEF_PROBS_H_
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 175fa9f61..a90bab4ba 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -15,6 +15,10 @@
 #include "treecoder.h"
 #include "blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Coefficient token alphabet */
 
 #define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
@@ -98,4 +102,8 @@ extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
 extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
 
 void vp8_coef_tree_initialize(void);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_ENTROPY_H_
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index 18af8c0b8..81bdfc4b8 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -15,6 +15,10 @@
 #include "onyxc_int.h"
 #include "treecoder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef enum
 {
     SUBMVREF_NORMAL,
@@ -77,4 +81,8 @@ void vp8_init_mbmode_probs(VP8_COMMON *x);
 void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
 void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_ENTROPYMODE_H_
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 7d16b988f..42840d58a 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -14,6 +14,10 @@
 
 #include "treecoder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 enum
 {
     mv_max  = 1023,              /* max absolute value of a MV component */
@@ -41,4 +45,8 @@ typedef struct mv_context
 
 extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_ENTROPYMV_H_
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index b7e704079..068f4ac52 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -14,6 +14,10 @@
 
 #include "vpx_scale/yv12config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
 void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
                                YV12_BUFFER_CONFIG *dst);
@@ -22,4 +26,8 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
                                          int srcy, int srcx,
                                          int srch, int srcw);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_EXTEND_H_
diff --git a/vp8/common/filter.h b/vp8/common/filter.h
index c2048ee13..cfba775fc 100644
--- a/vp8/common/filter.h
+++ b/vp8/common/filter.h
@@ -14,6 +14,10 @@
 
 #include "vpx_ports/mem.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define BLOCK_HEIGHT_WIDTH 4
 #define VP8_FILTER_WEIGHT 128
 #define VP8_FILTER_SHIFT  7
@@ -21,4 +25,8 @@
 extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
 extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_FILTER_H_
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 1525db24f..3c8c0506f 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -17,6 +17,10 @@
 #include "modecont.h"
 #include "treecoder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp,
                     const int *ref_frame_sign_bias)
@@ -179,4 +183,8 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi
     return (cur_mb->bmi + b - 4)->as_mode;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_FINDNEARMV_H_
diff --git a/vp8/common/header.h b/vp8/common/header.h
index 61a8f49bc..e27bca16b 100644
--- a/vp8/common/header.h
+++ b/vp8/common/header.h
@@ -12,6 +12,10 @@
 #ifndef VP8_COMMON_HEADER_H_
 #define VP8_COMMON_HEADER_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* 24 bits total */
 typedef struct
 {
@@ -40,4 +44,8 @@ typedef struct
 #endif
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_HEADER_H_
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 0186e6b3b..affe57e3d 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -21,6 +21,10 @@
 #include "vpx_mem/vpx_mem.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static void eob_adjust(char *eobs, short *diff)
 {
     /* eob adjust.... the idct can only skip if both the dc and eob are zero */
@@ -59,4 +63,8 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd)
                      xd->dst.y_buffer,
                      xd->dst.y_stride, xd->eobs);
 }
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_INVTRANS_H_
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index 51825efc1..20a6bd375 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -16,6 +16,10 @@
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_LOOP_FILTER             63
 /* fraction of total macroblock rows to be used in fast filter level picking */
 /* has to be > 2 */
@@ -102,4 +106,8 @@ void vp8_loop_filter_row_simple(struct VP8Common *cm,
                                 int mb_row, int post_ystride, int post_uvstride,
                                 unsigned char *y_ptr, unsigned char *u_ptr,
                                 unsigned char *v_ptr);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_LOOPFILTER_H_
diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h
index 875bc2754..ff34c33c5 100644
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@@ -12,6 +12,14 @@
 #ifndef VP8_COMMON_MODECONT_H_
 #define VP8_COMMON_MODECONT_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern const int vp8_mode_contexts[6][4];
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_MODECONT_H_
diff --git a/vp8/common/mv.h b/vp8/common/mv.h
index 1e4b206a2..111ccd63c 100644
--- a/vp8/common/mv.h
+++ b/vp8/common/mv.h
@@ -13,6 +13,10 @@
 #define VP8_COMMON_MV_H_
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct
 {
     short row;
@@ -25,4 +29,8 @@ typedef union int_mv
     MV        as_mv;
 } int_mv;        /* facilitates faster equality tests and copies */
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_MV_H_
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 92eb7f99a..6d89865c6 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -26,6 +26,10 @@
 #include "header.h"
 /*#endif*/
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MINQ 0
 #define MAXQ 127
 #define QINDEX_RANGE (MAXQ + 1)
@@ -174,4 +178,8 @@ typedef struct VP8Common
     int cpu_caps;
 } VP8_COMMON;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_ONYXC_INT_H_
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 10baf6ccf..33d0a7f02 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -26,6 +26,10 @@ struct postproc_state
 };
 #include "onyxc_int.h"
 #include "ppflags.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
                         vp8_ppflags_t *flags);
 
@@ -47,4 +51,8 @@ void vp8_deblock(struct VP8Common           *oci,
 #define MFQE_PRECISION 4
 
 void vp8_multiframe_quality_enhance(struct VP8Common *cm);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_POSTPROC_H_
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 1fb37e17a..768224aad 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -11,6 +11,10 @@
 
 #ifndef VP8_COMMON_PPFLAGS_H_
 #define VP8_COMMON_PPFLAGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 enum
 {
     VP8D_NOFILTERING            = 0,
@@ -38,4 +42,8 @@ typedef struct
     int display_mv_flag;
 } vp8_ppflags_t;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_PPFLAGS_H_
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
index be10452d2..329cc8275 100644
--- a/vp8/common/pragmas.h
+++ b/vp8/common/pragmas.h
@@ -11,6 +11,10 @@
 #ifndef VP8_COMMON_PRAGMAS_H_
 #define VP8_COMMON_PRAGMAS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __INTEL_COMPILER
 #pragma warning(disable:997 1011 170)
 #endif
@@ -18,4 +22,8 @@
 #pragma warning(disable:4799)
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_PRAGMAS_H_
diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h
index 4c7457f8d..700b5e6d7 100644
--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@@ -16,6 +16,10 @@
 #include "blockd.h"
 #include "onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int vp8_ac_yquant(int QIndex);
 extern int vp8_dc_quant(int QIndex, int Delta);
 extern int vp8_dc2quant(int QIndex, int Delta);
@@ -23,4 +27,8 @@ extern int vp8_ac2quant(int QIndex, int Delta);
 extern int vp8_dc_uv_quant(int QIndex, int Delta);
 extern int vp8_ac_uv_quant(int QIndex, int Delta);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_QUANT_COMMON_H_
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 50ebedc00..ba979b966 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -12,6 +12,10 @@
 #ifndef VP8_COMMON_RECONINTER_H_
 #define VP8_COMMON_RECONINTER_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
 extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
                                                unsigned char *dst_y,
@@ -32,4 +36,8 @@ extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
 extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
 extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_RECONINTER_H_
diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h
index cbb06ce78..ed59c9edd 100644
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -13,6 +13,10 @@
 #define VP8_COMMON_RECONINTRA4X4_H_
 #include "vp8/common/blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static void intra_prediction_down_copy(MACROBLOCKD *xd,
                                              unsigned char *above_right_src)
 {
@@ -29,4 +33,8 @@ static void intra_prediction_down_copy(MACROBLOCKD *xd,
     *dst_ptr2 = *src_ptr;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_RECONINTRA4X4_H_
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index 3db507b88..608f4a9ac 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -12,6 +12,10 @@
 #define VP8_COMMON_SETUPINTRARECON_H_
 
 #include "vpx_scale/yv12config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
 extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
 
@@ -34,4 +38,8 @@ void setup_intra_recon_left(unsigned char *y_buffer,
         v_buffer[uv_stride *i] = (unsigned char) 129;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_SETUPINTRARECON_H_
diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h
index ea8977b10..1d66cd3d6 100644
--- a/vp8/common/swapyv12buffer.h
+++ b/vp8/common/swapyv12buffer.h
@@ -14,6 +14,14 @@
 
 #include "vpx_scale/yv12config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_SWAPYV12BUFFER_H_
diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h
index 966cc5d8b..3d44e37cf 100644
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@@ -13,7 +13,15 @@
 
 #include "vpx_config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct VP8Common;
 void vp8_machine_specific_config(struct VP8Common *);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_SYSTEMDEPENDENT_H_
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index 8cf6d26bc..01c82dbb8 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -12,6 +12,10 @@
 #ifndef VP8_COMMON_THREADING_H_
 #define VP8_COMMON_THREADING_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
 
 /* Thread management macros */
@@ -183,4 +187,8 @@ static inline int sem_destroy(sem_t * sem)
 
 #endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_THREADING_H_
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index edb4b5775..d22b7c570 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -12,6 +12,10 @@
 #ifndef VP8_COMMON_TREECODER_H_
 #define VP8_COMMON_TREECODER_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef unsigned char vp8bc_index_t; /* probability index */
 
 
@@ -87,4 +91,8 @@ void vp8bc_tree_probs_from_distribution(
 );
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_TREECODER_H_
diff --git a/vp8/common/variance.h b/vp8/common/variance.h
index 12a03d0fb..89a32a722 100644
--- a/vp8/common/variance.h
+++ b/vp8/common/variance.h
@@ -14,6 +14,10 @@
 
 #include "vpx_config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef unsigned int(*vp8_sad_fn_t)(
     const unsigned char *src_ptr,
     int source_stride,
@@ -112,4 +116,8 @@ typedef struct variance_vtable
 #endif
 } vp8_variance_fn_ptr_t;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_VARIANCE_H_
diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h
index 988114804..c4aed4989 100644
--- a/vp8/common/vp8_entropymodedata.h
+++ b/vp8/common/vp8_entropymodedata.h
@@ -11,6 +11,10 @@
 #ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_
 #define VP8_COMMON_VP8_ENTROPYMODEDATA_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*Generated file, included by entropymode.c*/
 
 
@@ -243,4 +247,8 @@ const vp8_prob vp8_kf_bmode_prob
     }
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_VP8_ENTROPYMODEDATA_H_
diff --git a/vp8/common/x86/filter_x86.h b/vp8/common/x86/filter_x86.h
index 0d537d9df..d282841be 100644
--- a/vp8/common/x86/filter_x86.h
+++ b/vp8/common/x86/filter_x86.h
@@ -13,6 +13,10 @@
 
 #include "vpx_ports/mem.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
  * duplicated values */
 
@@ -22,4 +26,8 @@ extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]);
 /* duplicated 8x */
 extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_COMMON_X86_FILTER_X86_H_
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 82de6b8c8..36af7eed5 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -19,6 +19,10 @@
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef size_t VP8_BD_VALUE;
 
 #define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
@@ -135,4 +139,8 @@ static int vp8dx_bool_error(BOOL_DECODER *br)
     return 0;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_DBOOLHUFF_H_
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index b5d750ce4..f33b07351 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -13,6 +13,14 @@
 
 #include "onyxd_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_decode_mode_mvs(VP8D_COMP *);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_DECODEMV_H_
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index 3a8277f4e..c563cf6e9 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -11,6 +11,10 @@
 #ifndef VP8_DECODER_DECODERTHREADING_H_
 #define VP8_DECODER_DECODERTHREADING_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if CONFIG_MULTITHREAD
 void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
 void vp8_decoder_remove_threads(VP8D_COMP *pbi);
@@ -19,4 +23,8 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_DECODERTHREADING_H_
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index f134df869..f0b125444 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -13,7 +13,15 @@
 
 #include "onyxd_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_DETOKENIZE_H_
diff --git a/vp8/decoder/ec_types.h b/vp8/decoder/ec_types.h
index 0a3123a65..3af5ca86b 100644
--- a/vp8/decoder/ec_types.h
+++ b/vp8/decoder/ec_types.h
@@ -11,6 +11,10 @@
 #ifndef VP8_DECODER_EC_TYPES_H_
 #define VP8_DECODER_EC_TYPES_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_OVERLAPS 16
 
 
@@ -47,4 +51,8 @@ typedef struct
     MV_REFERENCE_FRAME ref_frame;
 } EC_BLOCK;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_EC_TYPES_H_
diff --git a/vp8/decoder/error_concealment.h b/vp8/decoder/error_concealment.h
index 10bf870b7..9a1e02486 100644
--- a/vp8/decoder/error_concealment.h
+++ b/vp8/decoder/error_concealment.h
@@ -15,6 +15,10 @@
 #include "onyxd_int.h"
 #include "ec_types.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Allocate memory for the overlap lists */
 int vp8_alloc_overlap_lists(VP8D_COMP *pbi);
 
@@ -38,4 +42,8 @@ void vp8_interpolate_motion(MACROBLOCKD *mb,
  */
 void vp8_conceal_corrupt_mb(MACROBLOCKD *xd);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_ERROR_CONCEALMENT_H_
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 3728152b0..8ef489403 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -22,6 +22,10 @@
 #include "ec_types.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct
 {
     int ithread;
@@ -148,4 +152,8 @@ int vp8_remove_decoder_instances(struct frame_buffers *fb);
     } while(0)
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_ONYXD_INT_H_
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index 1d3f67207..35ee69600 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -15,6 +15,10 @@
 #include "vp8/common/treecoder.h"
 #include "dboolhuff.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef BOOL_DECODER vp8_reader;
 
 #define vp8_read vp8dx_decode_bool
@@ -37,4 +41,8 @@ static int vp8_treed_read(
     return -i;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_DECODER_TREEREADER_H_
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 01aa808d0..eef2d79e0 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -12,6 +12,10 @@
 #ifndef VP8_ENCODER_BITSTREAM_H_
 #define VP8_ENCODER_BITSTREAM_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if HAVE_EDSP
 void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
                              const vp8_token *,
@@ -43,4 +47,8 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
 # define pack_mb_row_tokens(a,b)               pack_mb_row_tokens_c(a,b)
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_BITSTREAM_H_
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 0dc0d8600..dd733e55a 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -18,6 +18,10 @@
 #include "vp8/common/entropy.h"
 #include "vpx_ports/mem.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_MODES 20
 #define MAX_ERROR_BINS 1024
 
@@ -160,4 +164,8 @@ typedef struct macroblock
 } MACROBLOCK;
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_BLOCK_H_
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 8f451b7f0..611421575 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -22,6 +22,10 @@
 #include "vpx_ports/mem.h"
 #include "vpx/internal/vpx_codec_internal.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct
 {
     unsigned int lowvalue;
@@ -125,4 +129,8 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability)
     br->range = range;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_BOOLHUFF_H_
diff --git a/vp8/encoder/dct_value_cost.h b/vp8/encoder/dct_value_cost.h
index f754e974e..1cd3eec84 100644
--- a/vp8/encoder/dct_value_cost.h
+++ b/vp8/encoder/dct_value_cost.h
@@ -11,6 +11,10 @@
 #ifndef VP8_ENCODER_DCT_VALUE_COST_H_
 #define VP8_ENCODER_DCT_VALUE_COST_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Generated file, included by tokenize.c  */
 /* Values generated by fill_value_tokens() */
 
@@ -360,4 +364,8 @@ static const short dct_value_cost[2048*2] =
     8251, 8257, 8265, 8275
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_DCT_VALUE_COST_H_
diff --git a/vp8/encoder/dct_value_tokens.h b/vp8/encoder/dct_value_tokens.h
index e4132c62b..c2aadefca 100644
--- a/vp8/encoder/dct_value_tokens.h
+++ b/vp8/encoder/dct_value_tokens.h
@@ -11,6 +11,10 @@
 #ifndef VP8_ENCODER_DCT_VALUE_TOKENS_H_
 #define VP8_ENCODER_DCT_VALUE_TOKENS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Generated file, included by tokenize.c  */
 /* Values generated by fill_value_tokens() */
 
@@ -701,4 +705,8 @@ static const TOKENVALUE dct_value_tokens[2048*2] =
     {10, 3954}, {10, 3956}, {10, 3958}, {10, 3960}
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_DCT_VALUE_TOKENS_H_
diff --git a/vp8/encoder/defaultcoefcounts.h b/vp8/encoder/defaultcoefcounts.h
index 3015a585e..1e8e80484 100644
--- a/vp8/encoder/defaultcoefcounts.h
+++ b/vp8/encoder/defaultcoefcounts.h
@@ -11,6 +11,10 @@
 #ifndef VP8_ENCODER_DEFAULTCOEFCOUNTS_H_
 #define VP8_ENCODER_DEFAULTCOEFCOUNTS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Generated file, included by entropy.c */
 
 static const unsigned int default_coef_counts[BLOCK_TYPES]
@@ -225,4 +229,8 @@ static const unsigned int default_coef_counts[BLOCK_TYPES]
     },
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_DEFAULTCOEFCOUNTS_H_
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index 83fb93a14..cc9913afa 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -13,6 +13,10 @@
 
 #include "block.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define SUM_DIFF_THRESHOLD (16 * 16 * 2)
 #define MOTION_MAGNITUDE_THRESHOLD (8*3)
 
@@ -39,4 +43,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                              int recon_yoffset,
                              int recon_uvoffset);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_DENOISING_H_
diff --git a/vp8/encoder/encodeframe.h b/vp8/encoder/encodeframe.h
index 180596bf1..e185c1035 100644
--- a/vp8/encoder/encodeframe.h
+++ b/vp8/encoder/encodeframe.h
@@ -9,6 +9,10 @@
  */
 #ifndef VP8_ENCODER_ENCODEFRAME_H_
 #define VP8_ENCODER_ENCODEFRAME_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 extern void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
 
 extern void vp8_build_block_offsets(MACROBLOCK *x);
@@ -24,4 +28,8 @@ extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
 
 extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
         TOKENEXTRA **t);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_ENCODEFRAME_H_
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index c6da43b8f..a8d0284d2 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -13,9 +13,17 @@
 #define VP8_ENCODER_ENCODEINTRA_H_
 #include "onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred);
 void vp8_encode_intra16x16mby(MACROBLOCK *x);
 void vp8_encode_intra16x16mbuv(MACROBLOCK *x);
 void vp8_encode_intra4x4mby(MACROBLOCK *mb);
 void vp8_encode_intra4x4block(MACROBLOCK *x, int ib);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_ENCODEINTRA_H_
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index cbe62e9e5..0b3ec875e 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -13,6 +13,10 @@
 #define VP8_ENCODER_ENCODEMB_H_
 
 #include "onyx_int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 void vp8_encode_inter16x16(MACROBLOCK *x);
 
 void vp8_build_dcblock(MACROBLOCK *b);
@@ -23,4 +27,8 @@ void vp8_transform_intra_mby(MACROBLOCK *x);
 void vp8_optimize_mby(MACROBLOCK *x);
 void vp8_optimize_mbuv(MACROBLOCK *x);
 void vp8_encode_inter16x16y(MACROBLOCK *x);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_ENCODEMB_H_
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index 65e7ac21a..722162ba2 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -14,8 +14,16 @@
 
 #include "onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_write_mvprobs(VP8_COMP *);
 void vp8_encode_motion_vector(vp8_writer *, const MV *, const MV_CONTEXT *);
 void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_ENCODEMV_H_
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index cf686793c..c409ebca8 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -12,6 +12,10 @@
 #ifndef VP8_ENCODER_FIRSTPASS_H_
 #define VP8_ENCODER_FIRSTPASS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_init_first_pass(VP8_COMP *cpi);
 extern void vp8_first_pass(VP8_COMP *cpi);
 extern void vp8_end_first_pass(VP8_COMP *cpi);
@@ -21,4 +25,8 @@ extern void vp8_second_pass(VP8_COMP *cpi);
 extern void vp8_end_second_pass(VP8_COMP *cpi);
 
 extern size_t vp8_firstpass_stats_sz(unsigned int mb_count);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_FIRSTPASS_H_
diff --git a/vp8/encoder/lookahead.h b/vp8/encoder/lookahead.h
index d1904fa90..cad68e639 100644
--- a/vp8/encoder/lookahead.h
+++ b/vp8/encoder/lookahead.h
@@ -12,6 +12,10 @@
 #include "vpx_scale/yv12config.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct lookahead_entry
 {
     YV12_BUFFER_CONFIG  img;
@@ -106,4 +110,8 @@ unsigned int
 vp8_lookahead_depth(struct lookahead_ctx *ctx);
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_LOOKAHEAD_H_
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 85bc7add8..f284f7c38 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -15,6 +15,10 @@
 #include "block.h"
 #include "vp8/common/variance.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef VP8_ENTROPY_STATS
 extern void init_mv_ref_counts();
 extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
@@ -104,4 +108,8 @@ typedef int (*vp8_diamond_search_fn_t)
      int_mv *center_mv
     );
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_MCOMP_H_
diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h
index 2df944643..9281551c8 100644
--- a/vp8/encoder/modecosts.h
+++ b/vp8/encoder/modecosts.h
@@ -12,6 +12,14 @@
 #ifndef VP8_ENCODER_MODECOSTS_H_
 #define VP8_ENCODER_MODECOSTS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_init_mode_costs(VP8_COMP *x);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_MODECOSTS_H_
diff --git a/vp8/encoder/mr_dissim.h b/vp8/encoder/mr_dissim.h
index 8b22566bb..5a59ce62a 100644
--- a/vp8/encoder/mr_dissim.h
+++ b/vp8/encoder/mr_dissim.h
@@ -13,8 +13,16 @@
 #define VP8_ENCODER_MR_DISSIM_H_
 #include "vpx_config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi);
 extern void vp8_cal_dissimilarity(VP8_COMP *cpi);
 extern void vp8_store_drop_frame_info(VP8_COMP *cpi);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_MR_DISSIM_H_
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 2fa6a9337..6b371671d 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -33,6 +33,10 @@
 #include "vp8/encoder/denoising.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MIN_GF_INTERVAL             4
 #define DEFAULT_GF_INTERVAL         7
 
@@ -721,4 +725,8 @@ void vp8_set_speed_features(VP8_COMP *cpi);
                                "Failed to allocate "#lval);\
     } while(0)
 #endif
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_ONYX_INT_H_
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index f74cf3df1..cf3b1f8d4 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -14,6 +14,10 @@
 #include "vpx_config.h"
 #include "vp8/common/onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                                 int recon_uvoffset, int *returnrate,
                                 int *returndistortion, int *returnintra,
@@ -24,4 +28,8 @@ extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
                                       const vp8_variance_fn_ptr_t *vfp,
                                       unsigned int *sse,
                                       int_mv this_mv);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_PICKINTER_H_
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
index b2106151b..0c6c088c4 100644
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -12,6 +12,14 @@
 #ifndef VP8_ENCODER_PSNR_H_
 #define VP8_ENCODER_PSNR_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern double vp8_mse2psnr(double Samples, double Peak, double Mse);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_PSNR_H_
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index bbad8c7c3..c739b2627 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -12,6 +12,10 @@
 #ifndef VP8_ENCODER_QUANTIZE_H_
 #define VP8_ENCODER_QUANTIZE_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct VP8_COMP;
 struct macroblock;
 extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q);
@@ -20,4 +24,8 @@ extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x);
 extern void vp8cx_mb_init_quantizer(struct VP8_COMP *cpi, struct macroblock *x, int ok_to_skip);
 extern void vp8cx_init_quantizer(struct VP8_COMP *cpi);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_QUANTIZE_H_
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index 88fe67899..829697f39 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -14,6 +14,10 @@
 
 #include "onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_save_coding_context(VP8_COMP *cpi);
 extern void vp8_restore_coding_context(VP8_COMP *cpi);
 
@@ -26,4 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_
 /* return of 0 means drop frame */
 extern int vp8_pick_frame_size(VP8_COMP *cpi);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_RATECTRL_H_
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index c28c37369..fe21b8e28 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -12,6 +12,10 @@
 #ifndef VP8_ENCODER_RDOPT_H_
 #define VP8_ENCODER_RDOPT_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
 
 static void insertsortmv(int arr[], int len)
@@ -130,4 +134,8 @@ extern void vp8_mv_pred
 );
 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_RDOPT_H_
diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h
index 8811a8b62..6b5500594 100644
--- a/vp8/encoder/segmentation.h
+++ b/vp8/encoder/segmentation.h
@@ -15,6 +15,14 @@
 #include "vp8/common/blockd.h"
 #include "onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_SEGMENTATION_H_
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index f85f3c9b1..b73a9ee1c 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -15,6 +15,10 @@
 #include "vp8/common/entropy.h"
 #include "block.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_tokenize_initialize();
 
 typedef struct
@@ -47,4 +51,8 @@ extern const short *const vp8_dct_value_cost_ptr;
  */
 extern const TOKENVALUE *const vp8_dct_value_tokens_ptr;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_TOKENIZE_H_
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index ba03f07e3..cfb2730ab 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -19,6 +19,10 @@
 
 #include "boolhuff.h"       /* for now */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef BOOL_CODER vp8_writer;
 
 #define vp8_write vp8_encode_bool
@@ -123,4 +127,8 @@ void vp8_cost_tokens2(
     int *Costs, const vp8_prob *, vp8_tree, int
 );
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP8_ENCODER_TREEWRITER_H_
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 0b4c4cbbf..871b8d385 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -929,7 +929,6 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
         vp8_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
         vp8_decode,       /* vpx_codec_decode_fn_t     decode; */
         vp8_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
-        NOT_IMPLEMENTED,
     },
     { /* encoder functions */
         NOT_IMPLEMENTED,
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 71bf24c9f..279f678b1 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -18,6 +18,8 @@
     EXPORT  |vp9_h_predictor_32x32_neon|
     EXPORT  |vp9_tm_predictor_4x4_neon|
     EXPORT  |vp9_tm_predictor_8x8_neon|
+    EXPORT  |vp9_tm_predictor_16x16_neon|
+    EXPORT  |vp9_tm_predictor_32x32_neon|
     ARM
     REQUIRE8
     PRESERVE8
@@ -346,61 +348,289 @@ loop_h
     ldrb                r12, [r12]
     vdup.u8             d0, r12
 
+    ; preload 8 left
+    vld1.8              d30, [r3]
+
     ; Load above 8 pixels
     vld1.64             {d2}, [r2]
 
+    vmovl.u8            q10, d30
+
     ; Compute above - ytop_left
     vsubl.u8            q3, d2, d0
 
     ; Load left row by row and compute left + (above - ytop_left)
     ; 1st row and 2nd row
-    ldrb                r12, [r3], #1
-    ldrb                r2, [r3], #1
-    vdup.u16            q1, r12
-    vdup.u16            q2, r2
-    vadd.s16            q1, q1, q3
-    vadd.s16            q2, q2, q3
-    vqshrun.s16         d0, q1, #0
-    vqshrun.s16         d1, q2, #0
-    vst1.64             {d0}, [r0], r1
-    vst1.64             {d1}, [r0], r1
+    vdup.16             q0, d20[0]
+    vdup.16             q1, d20[1]
+    vadd.s16            q0, q3, q0
+    vadd.s16            q1, q3, q1
 
     ; 3rd row and 4th row
-    ldrb                r12, [r3], #1
-    ldrb                r2, [r3], #1
-    vdup.u16            q1, r12
-    vdup.u16            q2, r2
-    vadd.s16            q1, q1, q3
-    vadd.s16            q2, q2, q3
-    vqshrun.s16         d0, q1, #0
-    vqshrun.s16         d1, q2, #0
+    vdup.16             q8, d20[2]
+    vdup.16             q9, d20[3]
+    vadd.s16            q8, q3, q8
+    vadd.s16            q9, q3, q9
+
+    vqshrun.s16         d0, q0, #0
+    vqshrun.s16         d1, q1, #0
+    vqshrun.s16         d2, q8, #0
+    vqshrun.s16         d3, q9, #0
+
     vst1.64             {d0}, [r0], r1
     vst1.64             {d1}, [r0], r1
+    vst1.64             {d2}, [r0], r1
+    vst1.64             {d3}, [r0], r1
 
     ; 5th row and 6th row
-    ldrb                r12, [r3], #1
-    ldrb                r2, [r3], #1
-    vdup.u16            q1, r12
-    vdup.u16            q2, r2
-    vadd.s16            q1, q1, q3
-    vadd.s16            q2, q2, q3
-    vqshrun.s16         d0, q1, #0
-    vqshrun.s16         d1, q2, #0
-    vst1.64             {d0}, [r0], r1
-    vst1.64             {d1}, [r0], r1
+    vdup.16             q0, d21[0]
+    vdup.16             q1, d21[1]
+    vadd.s16            q0, q3, q0
+    vadd.s16            q1, q3, q1
+
+    ; 7th row and 8th row
+    vdup.16             q8, d21[2]
+    vdup.16             q9, d21[3]
+    vadd.s16            q8, q3, q8
+    vadd.s16            q9, q3, q9
+
+    vqshrun.s16         d0, q0, #0
+    vqshrun.s16         d1, q1, #0
+    vqshrun.s16         d2, q8, #0
+    vqshrun.s16         d3, q9, #0
 
-    ; 7rd row and 8th row
-    ldrb                r12, [r3], #1
-    ldrb                r2, [r3], #1
-    vdup.u16            q1, r12
-    vdup.u16            q2, r2
-    vadd.s16            q1, q1, q3
-    vadd.s16            q2, q2, q3
-    vqshrun.s16         d0, q1, #0
-    vqshrun.s16         d1, q2, #0
     vst1.64             {d0}, [r0], r1
     vst1.64             {d1}, [r0], r1
+    vst1.64             {d2}, [r0], r1
+    vst1.64             {d3}, [r0], r1
+
     bx                  lr
     ENDP                ; |vp9_tm_predictor_8x8_neon|
 
+;void vp9_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
+;                                const uint8_t *above,
+;                                const uint8_t *left)
+; r0  uint8_t *dst
+; r1  ptrdiff_t y_stride
+; r2  const uint8_t *above
+; r3  const uint8_t *left
+
+|vp9_tm_predictor_16x16_neon| PROC
+    ; Load ytop_left = above[-1];
+    sub                 r12, r2, #1
+    ldrb                r12, [r12]
+    vdup.u8             q0, r12
+
+    ; Load above 8 pixels
+    vld1.8              q1, [r2]
+
+    ; preload 8 left into r12
+    vld1.8              d18, [r3]!
+
+    ; Compute above - ytop_left
+    vsubl.u8            q2, d2, d0
+    vsubl.u8            q3, d3, d1
+
+    vmovl.u8            q10, d18
+
+    ; Load left row by row and compute left + (above - ytop_left)
+    ; Process 8 rows in each single loop and loop 2 times to process 16 rows.
+    mov                 r2, #2
+
+loop_16x16_neon
+    ; Process two rows.
+    vdup.16             q0, d20[0]
+    vdup.16             q8, d20[1]
+    vadd.s16            q1, q0, q2
+    vadd.s16            q0, q0, q3
+    vadd.s16            q11, q8, q2
+    vadd.s16            q8, q8, q3
+    vqshrun.s16         d2, q1, #0
+    vqshrun.s16         d3, q0, #0
+    vqshrun.s16         d22, q11, #0
+    vqshrun.s16         d23, q8, #0
+    vdup.16             q0, d20[2]                  ; proload next 2 rows data
+    vdup.16             q8, d20[3]
+    vst1.64             {d2,d3}, [r0], r1
+    vst1.64             {d22,d23}, [r0], r1
+
+    ; Process two rows.
+    vadd.s16            q1, q0, q2
+    vadd.s16            q0, q0, q3
+    vadd.s16            q11, q8, q2
+    vadd.s16            q8, q8, q3
+    vqshrun.s16         d2, q1, #0
+    vqshrun.s16         d3, q0, #0
+    vqshrun.s16         d22, q11, #0
+    vqshrun.s16         d23, q8, #0
+    vdup.16             q0, d21[0]                  ; proload next 2 rows data
+    vdup.16             q8, d21[1]
+    vst1.64             {d2,d3}, [r0], r1
+    vst1.64             {d22,d23}, [r0], r1
+
+    vadd.s16            q1, q0, q2
+    vadd.s16            q0, q0, q3
+    vadd.s16            q11, q8, q2
+    vadd.s16            q8, q8, q3
+    vqshrun.s16         d2, q1, #0
+    vqshrun.s16         d3, q0, #0
+    vqshrun.s16         d22, q11, #0
+    vqshrun.s16         d23, q8, #0
+    vdup.16             q0, d21[2]                  ; proload next 2 rows data
+    vdup.16             q8, d21[3]
+    vst1.64             {d2,d3}, [r0], r1
+    vst1.64             {d22,d23}, [r0], r1
+
+
+    vadd.s16            q1, q0, q2
+    vadd.s16            q0, q0, q3
+    vadd.s16            q11, q8, q2
+    vadd.s16            q8, q8, q3
+    vqshrun.s16         d2, q1, #0
+    vqshrun.s16         d3, q0, #0
+    vqshrun.s16         d22, q11, #0
+    vqshrun.s16         d23, q8, #0
+    vdup.16             q0, d20[2]
+    vdup.16             q8, d20[3]
+    vld1.8              d18, [r3]!                  ; preload 8 left into r12
+    vmovl.u8            q10, d18
+    vst1.64             {d2,d3}, [r0], r1
+    vst1.64             {d22,d23}, [r0], r1
+
+    subs                r2, r2, #1
+    bgt                 loop_16x16_neon
+
+    bx                  lr
+    ENDP                ; |vp9_tm_predictor_16x16_neon|
+
+;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
+;                                  const uint8_t *above,
+;                                  const uint8_t *left)
+; r0  uint8_t *dst
+; r1  ptrdiff_t y_stride
+; r2  const uint8_t *above
+; r3  const uint8_t *left
+
+|vp9_tm_predictor_32x32_neon| PROC
+    ; Load ytop_left = above[-1];
+    sub                 r12, r2, #1
+    ldrb                r12, [r12]
+    vdup.u8             q0, r12
+
+    ; Load above 32 pixels
+    vld1.8              q1, [r2]!
+    vld1.8              q2, [r2]
+
+    ; preload 8 left pixels
+    vld1.8              d26, [r3]!
+
+    ; Compute above - ytop_left
+    vsubl.u8            q8, d2, d0
+    vsubl.u8            q9, d3, d1
+    vsubl.u8            q10, d4, d0
+    vsubl.u8            q11, d5, d1
+
+    vmovl.u8            q3, d26
+
+    ; Load left row by row and compute left + (above - ytop_left)
+    ; Process 8 rows in each single loop and loop 4 times to process 32 rows.
+    mov                 r2, #4
+
+loop_32x32_neon
+    ; Process two rows.
+    vdup.16             q0, d6[0]
+    vdup.16             q2, d6[1]
+    vadd.s16            q12, q0, q8
+    vadd.s16            q13, q0, q9
+    vadd.s16            q14, q0, q10
+    vadd.s16            q15, q0, q11
+    vqshrun.s16         d0, q12, #0
+    vqshrun.s16         d1, q13, #0
+    vadd.s16            q12, q2, q8
+    vadd.s16            q13, q2, q9
+    vqshrun.s16         d2, q14, #0
+    vqshrun.s16         d3, q15, #0
+    vadd.s16            q14, q2, q10
+    vadd.s16            q15, q2, q11
+    vst1.64             {d0-d3}, [r0], r1
+    vqshrun.s16         d24, q12, #0
+    vqshrun.s16         d25, q13, #0
+    vqshrun.s16         d26, q14, #0
+    vqshrun.s16         d27, q15, #0
+    vdup.16             q1, d6[2]
+    vdup.16             q2, d6[3]
+    vst1.64             {d24-d27}, [r0], r1
+
+    ; Process two rows.
+    vadd.s16            q12, q1, q8
+    vadd.s16            q13, q1, q9
+    vadd.s16            q14, q1, q10
+    vadd.s16            q15, q1, q11
+    vqshrun.s16         d0, q12, #0
+    vqshrun.s16         d1, q13, #0
+    vadd.s16            q12, q2, q8
+    vadd.s16            q13, q2, q9
+    vqshrun.s16         d2, q14, #0
+    vqshrun.s16         d3, q15, #0
+    vadd.s16            q14, q2, q10
+    vadd.s16            q15, q2, q11
+    vst1.64             {d0-d3}, [r0], r1
+    vqshrun.s16         d24, q12, #0
+    vqshrun.s16         d25, q13, #0
+    vqshrun.s16         d26, q14, #0
+    vqshrun.s16         d27, q15, #0
+    vdup.16             q0, d7[0]
+    vdup.16             q2, d7[1]
+    vst1.64             {d24-d27}, [r0], r1
+
+    ; Process two rows.
+    vadd.s16            q12, q0, q8
+    vadd.s16            q13, q0, q9
+    vadd.s16            q14, q0, q10
+    vadd.s16            q15, q0, q11
+    vqshrun.s16         d0, q12, #0
+    vqshrun.s16         d1, q13, #0
+    vadd.s16            q12, q2, q8
+    vadd.s16            q13, q2, q9
+    vqshrun.s16         d2, q14, #0
+    vqshrun.s16         d3, q15, #0
+    vadd.s16            q14, q2, q10
+    vadd.s16            q15, q2, q11
+    vst1.64             {d0-d3}, [r0], r1
+    vqshrun.s16         d24, q12, #0
+    vqshrun.s16         d25, q13, #0
+    vqshrun.s16         d26, q14, #0
+    vqshrun.s16         d27, q15, #0
+    vdup.16             q0, d7[2]
+    vdup.16             q2, d7[3]
+    vst1.64             {d24-d27}, [r0], r1
+
+    ; Process two rows.
+    vadd.s16            q12, q0, q8
+    vadd.s16            q13, q0, q9
+    vadd.s16            q14, q0, q10
+    vadd.s16            q15, q0, q11
+    vqshrun.s16         d0, q12, #0
+    vqshrun.s16         d1, q13, #0
+    vadd.s16            q12, q2, q8
+    vadd.s16            q13, q2, q9
+    vqshrun.s16         d2, q14, #0
+    vqshrun.s16         d3, q15, #0
+    vadd.s16            q14, q2, q10
+    vadd.s16            q15, q2, q11
+    vst1.64             {d0-d3}, [r0], r1
+    vqshrun.s16         d24, q12, #0
+    vqshrun.s16         d25, q13, #0
+    vld1.8              d0, [r3]!                   ; preload 8 left pixels
+    vqshrun.s16         d26, q14, #0
+    vqshrun.s16         d27, q15, #0
+    vmovl.u8            q3, d0
+    vst1.64             {d24-d27}, [r0], r1
+
+    subs                r2, r2, #1
+    bgt                 loop_32x32_neon
+
+    bx                  lr
+    ENDP                ; |vp9_tm_predictor_32x32_neon|
+
     END
diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h
index e9c698119..991d3c2b3 100644
--- a/vp9/common/mips/dspr2/vp9_common_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h
@@ -17,6 +17,10 @@
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if HAVE_DSPR2
 #define CROP_WIDTH 512
 extern uint8_t *vp9_ff_cropTbl;
@@ -114,4 +118,8 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                               int w, int h);
 
 #endif  // #if HAVE_DSPR2
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_MIPS_DSPR2_VP9_COMMON_DSPR2_H_
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
index 98bfcfaf2..008cf8cac 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
@@ -17,6 +17,10 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if HAVE_DSPR2
 /* inputs & outputs are quad-byte vectors */
 static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
@@ -752,4 +756,8 @@ static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
   *oq6 = res_oq6;
 }
 #endif  // #if HAVE_DSPR2
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
index 4cb2ebb46..ca01a6a10 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
@@ -17,6 +17,10 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if HAVE_DSPR2
 #define STORE_F0() {                                                    \
     __asm__ __volatile__ (                                              \
@@ -467,4 +471,8 @@
 }
 
 #endif  // #if HAVE_DSPR2
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
index b9e0aca90..5b0d9cc9b 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
@@ -17,6 +17,10 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if HAVE_DSPR2
 /* processing 4 pixels at the same time
  * compute hev and mask in the same function */
@@ -362,4 +366,8 @@ static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
   *flat2 = flat1;
 }
 #endif  // #if HAVE_DSPR2
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index ca42090c1..e033fbb99 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -33,8 +33,8 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
 void vp9_free_frame_buffers(VP9_COMMON *cm) {
   int i;
 
-  for (i = 0; i < cm->fb_count; i++)
-    vp9_free_frame_buffer(&cm->yv12_fb[i]);
+  for (i = 0; i < FRAME_BUFFERS; i++)
+    vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
 
   vp9_free_frame_buffer(&cm->post_proc_buffer);
 
@@ -85,7 +85,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
   int mi_size;
 
   if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
-                               VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
+                               VP9_DEC_BORDER_IN_PIXELS) < 0)
     goto fail;
 
   set_mb_mi(cm, aligned_width, aligned_height);
@@ -137,33 +137,21 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
   const int ss_y = cm->subsampling_y;
   int mi_size;
 
-  if (cm->fb_count == 0) {
-    cm->fb_count = FRAME_BUFFERS;
-    CHECK_MEM_ERROR(cm, cm->yv12_fb,
-                    vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb)));
-    CHECK_MEM_ERROR(cm, cm->fb_idx_ref_cnt,
-                    vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_cnt)));
-    if (cm->fb_lru) {
-      CHECK_MEM_ERROR(cm, cm->fb_idx_ref_lru,
-                      vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_lru)));
-    }
-  }
-
   vp9_free_frame_buffers(cm);
 
-  for (i = 0; i < cm->fb_count; i++) {
-    cm->fb_idx_ref_cnt[i] = 0;
-    if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
-                               VP9_ENC_BORDER_IN_PIXELS) < 0)
+  for (i = 0; i < FRAME_BUFFERS; i++) {
+    cm->frame_bufs[i].ref_count = 0;
+    if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height,
+                               ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0)
       goto fail;
   }
 
-  cm->new_fb_idx = cm->fb_count - 1;
-  cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
+  cm->new_fb_idx = FRAME_BUFFERS - 1;
+  cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
 
   for (i = 0; i < REF_FRAMES; i++) {
     cm->ref_frame_map[i] = i;
-    cm->fb_idx_ref_cnt[i] = 1;
+    cm->frame_bufs[i].ref_count = 1;
   }
 
   if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
@@ -211,14 +199,6 @@ void vp9_create_common(VP9_COMMON *cm) {
 
 void vp9_remove_common(VP9_COMMON *cm) {
   vp9_free_frame_buffers(cm);
-
-  vpx_free(cm->yv12_fb);
-  vpx_free(cm->fb_idx_ref_cnt);
-  vpx_free(cm->fb_idx_ref_lru);
-
-  cm->yv12_fb = NULL;
-  cm->fb_idx_ref_cnt = NULL;
-  cm->fb_idx_ref_lru = NULL;
 }
 
 void vp9_initialize_common() {
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index cf8dca573..e3b5b95d8 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -14,6 +14,10 @@
 
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_initialize_common();
 
 void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
@@ -28,4 +32,8 @@ void vp9_free_frame_buffers(VP9_COMMON *cm);
 
 void vp9_update_frame_size(VP9_COMMON *cm);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_ALLOCCOMMON_H_
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index ad78b0dc4..49e336aa4 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -25,8 +25,12 @@
 #include "vp9/common/vp9_scale.h"
 #include "vp9/common/vp9_seg_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define BLOCK_SIZE_GROUPS 4
-#define MBSKIP_CONTEXTS 3
+#define SKIP_CONTEXTS 3
 #define INTER_MODE_CONTEXTS 7
 
 /* Segment Feature Masks */
@@ -131,7 +135,7 @@ typedef struct {
   // Flags used for prediction status of various bit-stream signals
   unsigned char seg_id_predicted;
 
-  INTERPOLATION_TYPE interp_filter;
+  INTERP_FILTER interp_filter;
 
   BLOCK_SIZE sb_type;
 } MB_MODE_INFO;
@@ -248,7 +252,7 @@ typedef struct macroblockd {
   /* Inverse transform function pointers. */
   void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
 
-  struct subpix_fn_table  subpix;
+  const interp_kernel *interp_kernel;
 
   int corrupted;
 
@@ -463,4 +467,8 @@ static int get_tx_eob(const struct segmentation *seg, int segment_id,
   return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_BLOCKD_H_
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 36d1cdf14..69964dae8 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -19,6 +19,10 @@
 #include "vpx_mem/vpx_mem.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 
@@ -91,4 +95,8 @@ static int get_unsigned_bits(unsigned int num_values) {
 #define VP9_FRAME_MARKER 0x2
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_COMMON_H_
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index 5222d29c1..f41962747 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -13,6 +13,10 @@
 
 #include "vp9/common/vp9_enums.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern const int b_width_log2_lookup[BLOCK_SIZES];
 extern const int b_height_log2_lookup[BLOCK_SIZES];
 extern const int mi_width_log2_lookup[BLOCK_SIZES];
@@ -28,4 +32,8 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
 extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
 extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_COMMON_DATA_H_
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 6edf7eaca..b105a57bc 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -20,7 +20,7 @@
 
 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
-                           const subpel_kernel *x_filters,
+                           const interp_kernel *x_filters,
                            int x0_q4, int x_step_q4, int w, int h) {
   int x, y;
   src -= SUBPEL_TAPS / 2 - 1;
@@ -42,7 +42,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
 
 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
-                               const subpel_kernel *x_filters,
+                               const interp_kernel *x_filters,
                                int x0_q4, int x_step_q4, int w, int h) {
   int x, y;
   src -= SUBPEL_TAPS / 2 - 1;
@@ -65,7 +65,7 @@ static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
 
 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
-                          const subpel_kernel *y_filters,
+                          const interp_kernel *y_filters,
                           int y0_q4, int y_step_q4, int w, int h) {
   int x, y;
   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
@@ -88,7 +88,7 @@ static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
 
 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
-                              const subpel_kernel *y_filters,
+                              const interp_kernel *y_filters,
                               int y0_q4, int y_step_q4, int w, int h) {
   int x, y;
   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
@@ -112,9 +112,9 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
 
 static void convolve(const uint8_t *src, ptrdiff_t src_stride,
                      uint8_t *dst, ptrdiff_t dst_stride,
-                     const subpel_kernel *const x_filters,
+                     const interp_kernel *const x_filters,
                      int x0_q4, int x_step_q4,
-                     const subpel_kernel *const y_filters,
+                     const interp_kernel *const y_filters,
                      int y0_q4, int y_step_q4,
                      int w, int h) {
   // Fixed size intermediate buffer places limits on parameters.
@@ -138,14 +138,14 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
                 y_filters, y0_q4, y_step_q4, w, h);
 }
 
-static const subpel_kernel *get_filter_base(const int16_t *filter) {
+static const interp_kernel *get_filter_base(const int16_t *filter) {
   // NOTE: This assumes that the filter table is 256-byte aligned.
   // TODO(agrange) Modify to make independent of table alignment.
-  return (const subpel_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
+  return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
 }
 
-static int get_filter_offset(const int16_t *f, const subpel_kernel *base) {
-  return (const subpel_kernel *)(intptr_t)f - base;
+static int get_filter_offset(const int16_t *f, const interp_kernel *base) {
+  return (const interp_kernel *)(intptr_t)f - base;
 }
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
@@ -153,7 +153,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
                            int w, int h) {
-  const subpel_kernel *const filters_x = get_filter_base(filter_x);
+  const interp_kernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
   convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
@@ -165,7 +165,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
                                int w, int h) {
-  const subpel_kernel *const filters_x = get_filter_base(filter_x);
+  const interp_kernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
   convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
@@ -177,7 +177,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
-  const subpel_kernel *const filters_y = get_filter_base(filter_y);
+  const interp_kernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
   convolve_vert(src, src_stride, dst, dst_stride, filters_y,
                 y0_q4, y_step_q4, w, h);
@@ -188,7 +188,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
-  const subpel_kernel *const filters_y = get_filter_base(filter_y);
+  const interp_kernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
   convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
                     y0_q4, y_step_q4, w, h);
@@ -199,10 +199,10 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                      const int16_t *filter_x, int x_step_q4,
                      const int16_t *filter_y, int y_step_q4,
                      int w, int h) {
-  const subpel_kernel *const filters_x = get_filter_base(filter_x);
+  const interp_kernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
-  const subpel_kernel *const filters_y = get_filter_base(filter_y);
+  const interp_kernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
 
   convolve(src, src_stride, dst, dst_stride,
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
index 29d499063..6bf71fc79 100644
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -13,10 +13,18 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_CONVOLVE_H_
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index ba162fd20..e030d92ec 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -18,6 +18,10 @@
 #include "vp9/common/vp9_scan.h"
 #include "vp9/common/vp9_entropymode.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define DIFF_UPDATE_PROB 252
 
 // Coefficient token alphabet
@@ -184,4 +188,8 @@ static const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
   }
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 83281b2ea..6def3c869 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -303,7 +303,7 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
   ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
 }
 
-static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
+static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = {
   192, 128, 64
 };
 
@@ -325,7 +325,7 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
   vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p);
   vp9_copy(cm->fc.single_ref_prob, default_single_ref_p);
   cm->fc.tx_probs = default_tx_probs;
-  vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
+  vp9_copy(cm->fc.skip_probs, default_skip_probs);
   vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs);
 }
 
@@ -385,7 +385,7 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
     adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i],
                 counts->partition[i], fc->partition_prob[i]);
 
-  if (cm->mcomp_filter_type == SWITCHABLE) {
+  if (cm->interp_filter == SWITCHABLE) {
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
       adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
                   counts->switchable_interp[i], fc->switchable_interp_prob[i]);
@@ -415,9 +415,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
     }
   }
 
-  for (i = 0; i < MBSKIP_CONTEXTS; ++i)
-    fc->mbskip_probs[i] = adapt_prob(pre_fc->mbskip_probs[i],
-                                     counts->mbskip[i]);
+  for (i = 0; i < SKIP_CONTEXTS; ++i)
+    fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]);
 }
 
 static void set_default_lf_deltas(struct loopfilter *lf) {
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 5312553c7..deec3f652 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -13,12 +13,14 @@
 
 #include "vp9/common/vp9_blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define TX_SIZE_CONTEXTS 2
 #define SWITCHABLE_FILTERS 3   // number of switchable filters
 #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
 
-// #define MODE_STATS
-
 struct VP9Common;
 
 struct tx_probs {
@@ -57,4 +59,8 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
 void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
                                     unsigned int (*ct_8x8p)[2]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_ENTROPYMODE_H_
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 48cb82db1..7e1f1479b 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -15,6 +15,10 @@
 #include "./vpx_config.h"
 #include "vp9/common/vp9_blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct VP9Common;
 
 void vp9_init_mv_probs(struct VP9Common *cm);
@@ -121,4 +125,8 @@ typedef struct {
 
 void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_ENTROPYMV_H_
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 34411a34f..e96e76947 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -13,6 +13,10 @@
 
 #include "./vpx_config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MI_SIZE_LOG2 3
 #define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2)  // 64 = 2^6
 
@@ -90,4 +94,8 @@ typedef enum {
   SRGB       = 7   // RGB
 } COLOR_SPACE;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_ENUMS_H_
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 79ace147c..dbde6d551 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -14,7 +14,7 @@
 
 #include "vp9/common/vp9_filter.h"
 
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
                 vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
   { 0, 0, 0, 128,   0, 0, 0, 0 },
   { 0, 0, 0, 120,   8, 0, 0, 0 },
@@ -35,7 +35,7 @@ DECLARE_ALIGNED(256, const subpel_kernel,
 };
 
 // Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
                 vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
   { 0,   0,   0, 128,   0,   0,   0,  0},
   { 0,   1,  -5, 126,   8,  -3,   1,  0},
@@ -56,7 +56,7 @@ DECLARE_ALIGNED(256, const subpel_kernel,
 };
 
 // DCT based filter
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
                 vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
   {0,   0,   0, 128,   0,   0,   0, 0},
   {-1,   3,  -7, 127,   8,  -3,   1, 0},
@@ -77,7 +77,7 @@ DECLARE_ALIGNED(256, const subpel_kernel,
 };
 
 // freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
                 vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
   { 0,  0,  0, 128,  0,  0,  0,  0},
   {-3, -1, 32,  64, 38,  1, -3,  0},
@@ -98,14 +98,15 @@ DECLARE_ALIGNED(256, const subpel_kernel,
 };
 
 
-static const subpel_kernel* vp9_filter_kernels[4] = {
+static const interp_kernel* vp9_filter_kernels[4] = {
   vp9_sub_pel_filters_8,
   vp9_sub_pel_filters_8lp,
   vp9_sub_pel_filters_8s,
   vp9_bilinear_filters
 };
 
-const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type) {
-  return vp9_filter_kernels[type];
+const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
+  assert(filter != SWITCHABLE);
+  return vp9_filter_kernels[filter];
 }
 
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index b1e7e6499..b611e304c 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -14,6 +14,10 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define FILTER_BITS 7
 
 #define SUBPEL_BITS 4
@@ -27,25 +31,24 @@ typedef enum {
   EIGHTTAP_SHARP = 2,
   BILINEAR = 3,
   SWITCHABLE = 4  /* should be the last one */
-} INTERPOLATION_TYPE;
-
-typedef int16_t subpel_kernel[SUBPEL_TAPS];
+} INTERP_FILTER;
 
-struct subpix_fn_table {
-  const subpel_kernel *filter_x;
-  const subpel_kernel *filter_y;
-};
+typedef int16_t interp_kernel[SUBPEL_TAPS];
 
-const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type);
+const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter);
 
-extern const subpel_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
 
 // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
 // filter kernel as a 2 tap filter.
 #define BILINEAR_FILTERS_2TAP(x) \
   (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_FILTER_H_
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 533f7f361..20b78bfed 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
   }
 }
 
-static void idct4_1d(const int16_t *input, int16_t *output) {
+static void idct4(const int16_t *input, int16_t *output) {
   int16_t step[4];
   int temp1, temp2;
   // stage 1
@@ -124,7 +124,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
 
   // Rows
   for (i = 0; i < 4; ++i) {
-    idct4_1d(input, outptr);
+    idct4(input, outptr);
     input += 4;
     outptr += 4;
   }
@@ -133,7 +133,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 4; ++i) {
     for (j = 0; j < 4; ++j)
       temp_in[j] = out[j * 4 + i];
-    idct4_1d(temp_in, temp_out);
+    idct4(temp_in, temp_out);
     for (j = 0; j < 4; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
                                   + dest[j * stride + i]);
@@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
   }
 }
 
-static void idct8_1d(const int16_t *input, int16_t *output) {
+static void idct8(const int16_t *input, int16_t *output) {
   int16_t step1[8], step2[8];
   int temp1, temp2;
   // stage 1
@@ -174,7 +174,7 @@ static void idct8_1d(const int16_t *input, int16_t *output) {
   step1[6] = dct_const_round_shift(temp2);
 
   // stage 2 & stage 3 - even half
-  idct4_1d(step1, step1);
+  idct4(step1, step1);
 
   // stage 2 - odd half
   step2[4] = step1[4] + step1[5];
@@ -209,7 +209,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
 
   // First transform rows
   for (i = 0; i < 8; ++i) {
-    idct8_1d(input, outptr);
+    idct8(input, outptr);
     input += 8;
     outptr += 8;
   }
@@ -218,7 +218,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 8; ++i) {
     for (j = 0; j < 8; ++j)
       temp_in[j] = out[j * 8 + i];
-    idct8_1d(temp_in, temp_out);
+    idct8(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
                                   + dest[j * stride + i]);
@@ -238,7 +238,7 @@ void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
   }
 }
 
-static void iadst4_1d(const int16_t *input, int16_t *output) {
+static void iadst4(const int16_t *input, int16_t *output) {
   int s0, s1, s2, s3, s4, s5, s6, s7;
 
   int x0 = input[0];
@@ -283,10 +283,10 @@ static void iadst4_1d(const int16_t *input, int16_t *output) {
 void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
                          int tx_type) {
   const transform_2d IHT_4[] = {
-    { idct4_1d, idct4_1d  },  // DCT_DCT  = 0
-    { iadst4_1d, idct4_1d  },   // ADST_DCT = 1
-    { idct4_1d, iadst4_1d },    // DCT_ADST = 2
-    { iadst4_1d, iadst4_1d }      // ADST_ADST = 3
+    { idct4, idct4  },  // DCT_DCT  = 0
+    { iadst4, idct4  },   // ADST_DCT = 1
+    { idct4, iadst4 },    // DCT_ADST = 2
+    { iadst4, iadst4 }      // ADST_ADST = 3
   };
 
   int i, j;
@@ -311,7 +311,7 @@ void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
                                   + dest[j * stride + i]);
   }
 }
-static void iadst8_1d(const int16_t *input, int16_t *output) {
+static void iadst8(const int16_t *input, int16_t *output) {
   int s0, s1, s2, s3, s4, s5, s6, s7;
 
   int x0 = input[7];
@@ -389,10 +389,10 @@ static void iadst8_1d(const int16_t *input, int16_t *output) {
 }
 
 static const transform_2d IHT_8[] = {
-  { idct8_1d,  idct8_1d  },  // DCT_DCT  = 0
-  { iadst8_1d, idct8_1d  },  // ADST_DCT = 1
-  { idct8_1d,  iadst8_1d },  // DCT_ADST = 2
-  { iadst8_1d, iadst8_1d }   // ADST_ADST = 3
+  { idct8,  idct8  },  // DCT_DCT  = 0
+  { iadst8, idct8  },  // ADST_DCT = 1
+  { idct8,  iadst8 },  // DCT_ADST = 2
+  { iadst8, iadst8 }   // ADST_ADST = 3
 };
 
 void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
@@ -430,7 +430,7 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
   // First transform rows
   // only first 4 row has non-zero coefs
   for (i = 0; i < 4; ++i) {
-    idct8_1d(input, outptr);
+    idct8(input, outptr);
     input += 8;
     outptr += 8;
   }
@@ -439,14 +439,14 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 8; ++i) {
     for (j = 0; j < 8; ++j)
       temp_in[j] = out[j * 8 + i];
-    idct8_1d(temp_in, temp_out);
+    idct8(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
                                   + dest[j * stride + i]);
   }
 }
 
-static void idct16_1d(const int16_t *input, int16_t *output) {
+static void idct16(const int16_t *input, int16_t *output) {
   int16_t step1[16], step2[16];
   int temp1, temp2;
 
@@ -619,7 +619,7 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
 
   // First transform rows
   for (i = 0; i < 16; ++i) {
-    idct16_1d(input, outptr);
+    idct16(input, outptr);
     input += 16;
     outptr += 16;
   }
@@ -628,14 +628,14 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 16; ++i) {
     for (j = 0; j < 16; ++j)
       temp_in[j] = out[j * 16 + i];
-    idct16_1d(temp_in, temp_out);
+    idct16(temp_in, temp_out);
     for (j = 0; j < 16; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
                                   + dest[j * stride + i]);
   }
 }
 
-static void iadst16_1d(const int16_t *input, int16_t *output) {
+static void iadst16(const int16_t *input, int16_t *output) {
   int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
 
   int x0 = input[15];
@@ -807,10 +807,10 @@ static void iadst16_1d(const int16_t *input, int16_t *output) {
 }
 
 static const transform_2d IHT_16[] = {
-  { idct16_1d,  idct16_1d  },  // DCT_DCT  = 0
-  { iadst16_1d, idct16_1d  },  // ADST_DCT = 1
-  { idct16_1d,  iadst16_1d },  // DCT_ADST = 2
-  { iadst16_1d, iadst16_1d }   // ADST_ADST = 3
+  { idct16,  idct16  },  // DCT_DCT  = 0
+  { iadst16, idct16  },  // ADST_DCT = 1
+  { idct16,  iadst16 },  // DCT_ADST = 2
+  { iadst16, iadst16 }   // ADST_ADST = 3
 };
 
 void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
@@ -848,7 +848,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
   // First transform rows. Since all non-zero dct coefficients are in
   // upper-left 4x4 area, we only need to calculate first 4 rows here.
   for (i = 0; i < 4; ++i) {
-    idct16_1d(input, outptr);
+    idct16(input, outptr);
     input += 16;
     outptr += 16;
   }
@@ -857,7 +857,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 16; ++i) {
     for (j = 0; j < 16; ++j)
       temp_in[j] = out[j*16 + i];
-    idct16_1d(temp_in, temp_out);
+    idct16(temp_in, temp_out);
     for (j = 0; j < 16; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
                                   + dest[j * stride + i]);
@@ -877,7 +877,7 @@ void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
   }
 }
 
-static void idct32_1d(const int16_t *input, int16_t *output) {
+static void idct32(const int16_t *input, int16_t *output) {
   int16_t step1[32], step2[32];
   int temp1, temp2;
 
@@ -1263,7 +1263,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
       zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
 
     if (zero_coeff[0] | zero_coeff[1])
-      idct32_1d(input, outptr);
+      idct32(input, outptr);
     else
       vpx_memset(outptr, 0, sizeof(int16_t) * 32);
     input += 32;
@@ -1274,7 +1274,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 32; ++i) {
     for (j = 0; j < 32; ++j)
       temp_in[j] = out[j * 32 + i];
-    idct32_1d(temp_in, temp_out);
+    idct32(temp_in, temp_out);
     for (j = 0; j < 32; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
                                         + dest[j * stride + i]);
@@ -1290,7 +1290,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
   // Rows
   // only upper-left 8x8 has non-zero coeff
   for (i = 0; i < 8; ++i) {
-    idct32_1d(input, outptr);
+    idct32(input, outptr);
     input += 32;
     outptr += 32;
   }
@@ -1299,7 +1299,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
   for (i = 0; i < 32; ++i) {
     for (j = 0; j < 32; ++j)
       temp_in[j] = out[j * 32 + i];
-    idct32_1d(temp_in, temp_out);
+    idct32(temp_in, temp_out);
     for (j = 0; j < 32; ++j)
       dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
                                   + dest[j * stride + i]);
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 183c50abf..ceca7951b 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -18,6 +18,10 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_enums.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 // Constants and Macros used by all idct/dct functions
 #define DCT_CONST_BITS 14
@@ -103,4 +107,8 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
                       int stride, int eob);
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_IDCT_H_
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 2266e0ec2..dd304c909 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -16,26 +16,6 @@
 
 #include "vp9/common/vp9_seg_common.h"
 
-// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
-// Each 1 bit represents a position in which we want to apply the loop filter.
-// Left_ entries refer to whether we apply a filter on the border to the
-// left of the block.   Above_ entries refer to whether or not to apply a
-// filter on the above border.   Int_ entries refer to whether or not to
-// apply borders on the 4x4 edges within the 8x8 block that each bit
-// represents.
-// Since each transform is accompanied by a potentially different type of
-// loop filter there is a different entry in the array for each transform size.
-typedef struct {
-  uint64_t left_y[TX_SIZES];
-  uint64_t above_y[TX_SIZES];
-  uint64_t int_4x4_y;
-  uint16_t left_uv[TX_SIZES];
-  uint16_t above_uv[TX_SIZES];
-  uint16_t int_4x4_uv;
-  uint8_t lfl_y[64];
-  uint8_t lfl_uv[16];
-} LOOP_FILTER_MASK;
-
 // 64 bit masks for left transform size.  Each 1 represents a position where
 // we should apply a loop filter across the left border of an 8x8 block
 // boundary.
@@ -638,9 +618,9 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
 // This function sets up the bit masks for the entire 64x64 region represented
 // by mi_row, mi_col.
 // TODO(JBB): This function only works for yv12.
-static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
-                       MODE_INFO **mi_8x8, const int mode_info_stride,
-                       LOOP_FILTER_MASK *lfm) {
+void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
+                    MODE_INFO **mi_8x8, const int mode_info_stride,
+                    LOOP_FILTER_MASK *lfm) {
   int idx_32, idx_16, idx_8;
   const loop_filter_info_n *const lfi_n = &cm->lf_info;
   MODE_INFO **mip = mi_8x8;
@@ -1069,10 +1049,10 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
 }
 #endif
 
-static void filter_block_plane(VP9_COMMON *const cm,
-                               struct macroblockd_plane *const plane,
-                               int mi_row,
-                               LOOP_FILTER_MASK *lfm) {
+void vp9_filter_block_plane(VP9_COMMON *const cm,
+                            struct macroblockd_plane *const plane,
+                            int mi_row,
+                            LOOP_FILTER_MASK *lfm) {
   struct buf_2d *const dst = &plane->dst;
   uint8_t* const dst0 = dst->buf;
   int r, c;
@@ -1244,14 +1224,14 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
 #if CONFIG_NON420
       if (use_420)
 #endif
-        setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
-                   &lfm);
+        vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
+                       cm->mode_info_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
 #if CONFIG_NON420
         if (use_420)
 #endif
-          filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+          vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
 #if CONFIG_NON420
         else
           filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 98fac96ff..668e898cf 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -17,6 +17,10 @@
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_seg_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_LOOP_FILTER 63
 #define MAX_SHARPNESS 7
 
@@ -56,9 +60,42 @@ typedef struct {
   uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
 } loop_filter_info_n;
 
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block.   Above_ entries refer to whether or not to apply a
+// filter on the above border.   Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+  uint64_t left_y[TX_SIZES];
+  uint64_t above_y[TX_SIZES];
+  uint64_t int_4x4_y;
+  uint16_t left_uv[TX_SIZES];
+  uint16_t above_uv[TX_SIZES];
+  uint16_t int_4x4_uv;
+  uint8_t lfl_y[64];
+  uint8_t lfl_uv[16];
+} LOOP_FILTER_MASK;
+
 /* assorted loopfilter functions which get used elsewhere */
 struct VP9Common;
 struct macroblockd;
+struct VP9LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp9_setup_mask(struct VP9Common *const cm,
+                    const int mi_row, const int mi_col,
+                    MODE_INFO **mi_8x8, const int mode_info_stride,
+                    LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane(struct VP9Common *const cm,
+                            struct macroblockd_plane *const plane,
+                            int mi_row,
+                            LOOP_FILTER_MASK *lfm);
 
 void vp9_loop_filter_init(struct VP9Common *cm);
 
@@ -86,8 +123,15 @@ typedef struct LoopFilterWorkerData {
   int start;
   int stop;
   int y_only;
+
+  struct VP9LfSyncData *lf_sync;
+  int num_lf_workers;
 } LFWorkerData;
 
 // Operates on the rows described by LFWorkerData passed as 'arg1'.
 int vp9_loop_filter_worker(void *arg1, void *arg2);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_LOOPFILTER_H_
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 155c3f12e..98fd1d82f 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -15,6 +15,10 @@
 
 #include "vp9/common/vp9_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct mv {
   int16_t row;
   int16_t col;
@@ -36,4 +40,8 @@ static void clamp_mv(MV *mv, int min_col, int max_col,
   mv->row = clamp(mv->row, min_row, max_row);
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_MV_H_
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index cd89390d5..0936abfcd 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -7,12 +7,16 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
+#define VP9_COMMON_VP9_MVREF_COMMON_H_
 
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_blockd.h"
 
-#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
-#define VP9_COMMON_VP9_MVREF_COMMON_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                           const TileInfo *const tile,
@@ -56,4 +60,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
                                    int block, int ref, int mi_row, int mi_col,
                                    int_mv *nearest, int_mv *near);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_MVREF_COMMON_H_
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 45d798482..564e4195f 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -11,17 +11,16 @@
 #ifndef VP9_COMMON_VP9_ONYX_H_
 #define VP9_COMMON_VP9_ONYX_H_
 
-#ifdef __cplusplus
-extern "C"
-{ // NOLINT
-#endif
-
 #include "./vpx_config.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "vpx/vp8cx.h"
 #include "vpx_scale/yv12config.h"
 #include "vp9/common/vp9_ppflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_SEGMENTS 8
 
   typedef int *VP9_PTR;
@@ -56,6 +55,7 @@ extern "C"
     MODE_FIRSTPASS      = 0x3,
     MODE_SECONDPASS     = 0x4,
     MODE_SECONDPASS_BEST = 0x5,
+    MODE_REALTIME       = 0x6,
   } MODE;
 
   typedef enum {
@@ -237,7 +237,7 @@ extern "C"
   int vp9_get_quantizer(VP9_PTR c);
 
 #ifdef __cplusplus
-}
+}  // extern "C"
 #endif
 
 #endif  // VP9_COMMON_VP9_ONYX_H_
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index f6fe4d3f1..d92a25b12 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -25,6 +25,10 @@
 #include "vp9/common/vp9_postproc.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define REFS_PER_FRAME 3
 
 #define REF_FRAMES_LOG2 3
@@ -56,7 +60,7 @@ typedef struct frame_contexts {
   vp9_prob single_ref_prob[REF_CONTEXTS][2];
   vp9_prob comp_ref_prob[REF_CONTEXTS];
   struct tx_probs tx_probs;
-  vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
+  vp9_prob skip_probs[SKIP_CONTEXTS];
   nmv_context nmvc;
 } FRAME_CONTEXT;
 
@@ -75,7 +79,7 @@ typedef struct {
   unsigned int single_ref[REF_CONTEXTS][2][2];
   unsigned int comp_ref[REF_CONTEXTS][2];
   struct tx_counts tx;
-  unsigned int mbskip[MBSKIP_CONTEXTS][2];
+  unsigned int skip[SKIP_CONTEXTS][2];
   nmv_context_counts mv;
 } FRAME_COUNTS;
 
@@ -87,6 +91,12 @@ typedef enum {
   REFERENCE_MODES       = 3,
 } REFERENCE_MODE;
 
+
+typedef struct {
+  int ref_count;
+  YV12_BUFFER_CONFIG buf;
+} RefCntBuffer;
+
 typedef struct VP9Common {
   struct vpx_internal_error_info  error;
 
@@ -113,8 +123,8 @@ typedef struct VP9Common {
 
   YV12_BUFFER_CONFIG *frame_to_show;
 
-  YV12_BUFFER_CONFIG *yv12_fb;
-  int *fb_idx_ref_cnt; /* reference counts */
+  RefCntBuffer frame_bufs[FRAME_BUFFERS];
+
   int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
 
   // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
@@ -180,7 +190,7 @@ typedef struct VP9Common {
   // Persistent mb segment id map used in prediction.
   unsigned char *last_frame_seg_map;
 
-  INTERPOLATION_TYPE mcomp_filter_type;
+  INTERP_FILTER interp_filter;
 
   loop_filter_info_n lf_info;
 
@@ -213,55 +223,32 @@ typedef struct VP9Common {
   int frame_parallel_decoding_mode;
 
   int log2_tile_cols, log2_tile_rows;
-
-  vpx_codec_frame_buffer_t *fb_list;  // External frame buffers
-  int fb_count;  // Total number of frame buffers
-  vpx_realloc_frame_buffer_cb_fn_t realloc_fb_cb;
-  void *user_priv;  // Private data associated with the external frame buffers.
-
-  int fb_lru;  // Flag telling if lru is on/off
-  uint32_t *fb_idx_ref_lru;  // Frame buffer lru cache
-  uint32_t fb_idx_ref_lru_count;
 } VP9_COMMON;
 
 static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
-  return &cm->yv12_fb[cm->new_fb_idx];
+  return &cm->frame_bufs[cm->new_fb_idx].buf;
 }
 
 static int get_free_fb(VP9_COMMON *cm) {
   int i;
-  uint32_t lru_count = cm->fb_idx_ref_lru_count + 1;
-  int free_buffer_idx = cm->fb_count;
-  for (i = 0; i < cm->fb_count; i++) {
-    if (!cm->fb_lru) {
-      if (cm->fb_idx_ref_cnt[i] == 0) {
-        free_buffer_idx = i;
-        break;
-      }
-    } else {
-      if (cm->fb_idx_ref_cnt[i] == 0 && cm->fb_idx_ref_lru[i] < lru_count) {
-        free_buffer_idx = i;
-        lru_count = cm->fb_idx_ref_lru[i];
-      }
-    }
-  }
+  for (i = 0; i < FRAME_BUFFERS; i++)
+    if (cm->frame_bufs[i].ref_count == 0)
+      break;
 
-  assert(free_buffer_idx < cm->fb_count);
-  cm->fb_idx_ref_cnt[free_buffer_idx] = 1;
-  if (cm->fb_lru)
-    cm->fb_idx_ref_lru[free_buffer_idx] = ++cm->fb_idx_ref_lru_count;
-  return free_buffer_idx;
+  assert(i < FRAME_BUFFERS);
+  cm->frame_bufs[i].ref_count = 1;
+  return i;
 }
 
-static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
+static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
   const int ref_index = *idx;
 
-  if (ref_index >= 0 && buf[ref_index] > 0)
-    buf[ref_index]--;
+  if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
+    bufs[ref_index].ref_count--;
 
   *idx = new_idx;
 
-  buf[new_idx]++;
+  bufs[new_idx].ref_count++;
 }
 
 static int mi_cols_aligned_to_sb(int n_mis) {
@@ -359,4 +346,8 @@ static INLINE int partition_plane_context(
   return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_ONYXC_INT_H_
diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h
index b8a456fdb..b07d5d045 100644
--- a/vp9/common/vp9_postproc.h
+++ b/vp9/common/vp9_postproc.h
@@ -15,6 +15,10 @@
 #include "vpx_ports/mem.h"
 #include "vp9/common/vp9_ppflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct postproc_state {
   int last_q;
   int last_noise;
@@ -33,4 +37,8 @@ void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
 
 void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_POSTPROC_H_
diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h
index 561c93028..8168935fc 100644
--- a/vp9/common/vp9_ppflags.h
+++ b/vp9/common/vp9_ppflags.h
@@ -11,6 +11,10 @@
 #ifndef VP9_COMMON_VP9_PPFLAGS_H_
 #define VP9_COMMON_VP9_PPFLAGS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 enum {
   VP9D_NOFILTERING            = 0,
   VP9D_DEBLOCK                = 1 << 0,
@@ -35,4 +39,8 @@ typedef struct {
   int display_mv_flag;
 } vp9_ppflags_t;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_PPFLAGS_H_
diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h
index f079161d6..0efc713ca 100644
--- a/vp9/common/vp9_pragmas.h
+++ b/vp9/common/vp9_pragmas.h
@@ -11,6 +11,10 @@
 #ifndef VP9_COMMON_VP9_PRAGMAS_H_
 #define VP9_COMMON_VP9_PRAGMAS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __INTEL_COMPILER
 #pragma warning(disable:997 1011 170)
 #endif
@@ -19,4 +23,8 @@
 #pragma warning(disable:4799)
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_PRAGMAS_H_
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 23722ba72..0acee32f8 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -14,6 +14,10 @@
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static INLINE const MODE_INFO *get_above_mi(const MACROBLOCKD *const xd) {
   return xd->up_available ? xd->mi_8x8[-xd->mode_info_stride] : NULL;
 }
@@ -50,7 +54,7 @@ static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
 
 static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm,
                                          const MACROBLOCKD *xd) {
-  return cm->fc.mbskip_probs[vp9_get_skip_context(xd)];
+  return cm->fc.skip_probs[vp9_get_skip_context(xd)];
 }
 
 int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
@@ -129,4 +133,8 @@ static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
   }
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h
index 7a790c542..cc8d8ab38 100644
--- a/vp9/common/vp9_prob.h
+++ b/vp9/common/vp9_prob.h
@@ -18,6 +18,10 @@
 
 #include "vp9/common/vp9_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef uint8_t vp9_prob;
 
 #define MAX_PROB 255
@@ -109,4 +113,8 @@ static void tree_merge_probs(const vp9_tree_index *tree,
 
 DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_PROB_H_
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index 83f2fb655..af50e23cd 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -13,6 +13,10 @@
 
 #include "vp9/common/vp9_blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MINQ 0
 #define MAXQ 255
 #define QINDEX_RANGE (MAXQ - MINQ + 1)
@@ -25,4 +29,8 @@ int16_t vp9_ac_quant(int qindex, int delta);
 
 int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_QUANT_COMMON_H_
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index b5a9248c3..d554cc0ed 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -69,13 +69,11 @@ static void inter_predictor(const uint8_t *src, int src_stride,
                             const int subpel_y,
                             const struct scale_factors *sf,
                             int w, int h, int ref,
-                            const struct subpix_fn_table *subpix,
+                            const interp_kernel *kernel,
                             int xs, int ys) {
   sf->predict[subpel_x != 0][subpel_y != 0][ref](
       src, src_stride, dst, dst_stride,
-      subpix->filter_x[subpel_x], xs,
-      subpix->filter_y[subpel_y], ys,
-      w, h);
+      kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
 }
 
 void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -83,7 +81,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                                const MV *src_mv,
                                const struct scale_factors *sf,
                                int w, int h, int ref,
-                               const struct subpix_fn_table *subpix,
+                               const interp_kernel *kernel,
                                enum mv_precision precision,
                                int x, int y) {
   const int is_q4 = precision == MV_PRECISION_Q4;
@@ -96,7 +94,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
 
   inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
-                  sf, w, h, ref, subpix, sf->x_step_q4, sf->y_step_q4);
+                  sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
 }
 
 static INLINE int round_mv_comp_q4(int value) {
@@ -198,7 +196,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
            + (scaled_mv.col >> SUBPEL_BITS);
 
     inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                    subpel_x, subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
+                    subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel,
+                    xs, ys);
   }
 }
 
@@ -367,7 +366,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
     }
 
     inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                    subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
+                    subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys);
   }
 }
 
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 3cc16d94e..3345d83e8 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -14,7 +14,10 @@
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
-struct subpix_fn_table;
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
                                     BLOCK_SIZE bsize);
 
@@ -32,7 +35,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                                const MV *mv_q3,
                                const struct scale_factors *sf,
                                int w, int h, int do_avg,
-                               const struct subpix_fn_table *subpix,
+                               const interp_kernel *kernel,
                                enum mv_precision precision,
                                int x, int y);
 
@@ -90,10 +93,8 @@ static void setup_pre_planes(MACROBLOCKD *xd, int idx,
   }
 }
 
-static void set_scale_factors(VP9_COMMON *cm, MACROBLOCKD *xd,
-                              int ref0, int ref1) {
-  xd->block_refs[0] = &cm->frame_refs[ref0 >= 0 ? ref0 : 0];
-  xd->block_refs[1] = &cm->frame_refs[ref1 >= 0 ? ref1 : 0];
-}
+#ifdef __cplusplus
+}  // extern "C"
+#endif
 
 #endif  // VP9_COMMON_VP9_RECONINTER_H_
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index fc916fcf3..800736d30 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -14,9 +14,17 @@
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
                              TX_SIZE tx_size, int mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,
                              int aoff, int loff, int plane);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_RECONINTRA_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index e384032f4..04a40bd58 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -135,7 +135,7 @@ prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const ui
 specialize vp9_v_predictor_16x16 $sse2_x86inc neon
 
 prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_16x16 $sse2_x86inc
+specialize vp9_tm_predictor_16x16 $sse2_x86inc neon
 
 prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
 specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2
@@ -174,7 +174,7 @@ prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const ui
 specialize vp9_v_predictor_32x32 $sse2_x86inc neon
 
 prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_32x32 $sse2_x86_64
+specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon
 
 prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
 specialize vp9_dc_predictor_32x32 $sse2_x86inc
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 55b4d8888..90b0d0bf9 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -14,6 +14,10 @@
 #include "vp9/common/vp9_mv.h"
 #include "vp9/common/vp9_convolve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define REF_SCALE_SHIFT 14
 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
 #define REF_INVALID_SCALE -1
@@ -46,4 +50,8 @@ static int vp9_is_scaled(const struct scale_factors *sf) {
          sf->y_scale_fp != REF_NO_SCALE;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_SCALE_H_
diff --git a/vp9/common/vp9_scan.h b/vp9/common/vp9_scan.h
index efab48bfc..9613b675c 100644
--- a/vp9/common/vp9_scan.h
+++ b/vp9/common/vp9_scan.h
@@ -17,6 +17,10 @@
 #include "vp9/common/vp9_enums.h"
 #include "vp9/common/vp9_blockd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_NEIGHBORS 2
 
 void vp9_init_neighbors();
@@ -36,4 +40,8 @@ static INLINE int get_coef_context(const int16_t *neighbors,
           token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_SCAN_H_
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index 8ff54fb73..ff2d66a36 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -13,6 +13,10 @@
 
 #include "vp9/common/vp9_prob.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define SEGMENT_DELTADATA   0
 #define SEGMENT_ABSDATA     1
 
@@ -70,5 +74,9 @@ int vp9_get_segdata(const struct segmentation *seg,
 
 extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_SEG_COMMON_H_
 
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index 6f955ab56..ee9a4823b 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -11,6 +11,10 @@
 #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
 #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef _MSC_VER
 #include <math.h>
 #define snprintf _snprintf
@@ -72,4 +76,8 @@ static INLINE int get_msb(unsigned int n) {
 struct VP9Common;
 void vp9_machine_specific_config(struct VP9Common *cm);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
diff --git a/vp9/common/vp9_textblit.h b/vp9/common/vp9_textblit.h
index c968628fe..158ec1b37 100644
--- a/vp9/common/vp9_textblit.h
+++ b/vp9/common/vp9_textblit.h
@@ -11,9 +11,17 @@
 #ifndef VP9_COMMON_VP9_TEXTBLIT_H_
 #define VP9_COMMON_VP9_TEXTBLIT_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_blit_text(const char *msg, unsigned char *address, int pitch);
 
 void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image,
                    int pitch);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_TEXTBLIT_H_
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
index a09876e4b..a97719e29 100644
--- a/vp9/common/vp9_tile_common.h
+++ b/vp9/common/vp9_tile_common.h
@@ -11,6 +11,10 @@
 #ifndef VP9_COMMON_VP9_TILE_COMMON_H_
 #define VP9_COMMON_VP9_TILE_COMMON_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct VP9Common;
 
 typedef struct TileInfo {
@@ -26,4 +30,8 @@ void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm,
 void vp9_get_tile_n_bits(int mi_cols,
                          int *min_log2_tile_cols, int *max_log2_tile_cols);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_VP9_TILE_COMMON_H_
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index f95423678..8a2297feb 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -23,6 +23,68 @@ typedef void filter8_1dfunction (
   const short *filter
 );
 
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+                                  uint8_t *dst, ptrdiff_t dst_stride, \
+                                  const int16_t *filter_x, int x_step_q4, \
+                                  const int16_t *filter_y, int y_step_q4, \
+                                  int w, int h) { \
+  if (step_q4 == 16 && filter[3] != 128) { \
+    while (w >= 16) { \
+      vp9_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, \
+                                               dst, dst_stride, \
+                                               h, filter); \
+      src += 16; \
+      dst += 16; \
+      w -= 16; \
+    } \
+    while (w >= 8) { \
+      vp9_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, \
+                                              dst, dst_stride, \
+                                              h, filter); \
+      src += 8; \
+      dst += 8; \
+      w -= 8; \
+    } \
+    while (w >= 4) { \
+      vp9_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, \
+                                              dst, dst_stride, \
+                                              h, filter); \
+      src += 4; \
+      dst += 4; \
+      w -= 4; \
+    } \
+  } \
+  if (w) { \
+    vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+                             filter_x, x_step_q4, filter_y, y_step_q4, \
+                             w, h); \
+  } \
+}
+
+#define FUN_CONV_2D(avg, opt) \
+void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+                              uint8_t *dst, ptrdiff_t dst_stride, \
+                              const int16_t *filter_x, int x_step_q4, \
+                              const int16_t *filter_y, int y_step_q4, \
+                              int w, int h) { \
+  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
+  \
+  assert(w <= 64); \
+  assert(h <= 64); \
+  if (x_step_q4 == 16 && y_step_q4 == 16) { \
+    vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+                              filter_x, x_step_q4, filter_y, y_step_q4, \
+                              w, h + 7); \
+    vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+                                    filter_x, x_step_q4, filter_y, y_step_q4, \
+                                    w, h); \
+  } else { \
+    vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+                           filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
+  } \
+}
+
 #if HAVE_SSSE3
 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
@@ -37,201 +99,44 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
 
-void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h) {
-  /* Ensure the filter can be compressed to int16_t. */
-  if (x_step_q4 == 16 && filter_x[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_h8_ssse3(src, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_x);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_h8_ssse3(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_h8_ssse3(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                          filter_x, x_step_q4, filter_y, y_step_q4,
-                          w, h);
-  }
-}
-
-void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4,
-                              int w, int h) {
-  if (y_step_q4 == 16 && filter_y[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_y);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                         filter_x, x_step_q4, filter_y, y_step_q4,
-                         w, h);
-  }
-}
-
-void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h) {
-  if (x_step_q4 == 16 && filter_x[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_x);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h);
-  }
-}
-
-void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4,
-                              int w, int h) {
-  if (y_step_q4 == 16 && filter_y[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_y);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4, filter_y, y_step_q4,
-                             w, h);
-  }
-}
-
-void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                         uint8_t *dst, ptrdiff_t dst_stride,
-                         const int16_t *filter_x, int x_step_q4,
-                         const int16_t *filter_y, int y_step_q4,
-                         int w, int h) {
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
-  assert(w <= 64);
-  assert(h <= 64);
-  if (x_step_q4 == 16 && y_step_q4 == 16) {
-    vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h + 7);
-    vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
-                             filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  } else {
-    vp9_convolve8_c(src, src_stride, dst, dst_stride,
-                    filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  }
-}
-
-void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-                         uint8_t *dst, ptrdiff_t dst_stride,
-                         const int16_t *filter_x, int x_step_q4,
-                         const int16_t *filter_y, int y_step_q4,
-                         int w, int h) {
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
-  assert(w <= 64);
-  assert(h <= 64);
-  if (x_step_q4 == 16 && y_step_q4 == 16) {
-    vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h + 7);
-    vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
-                                 filter_x, x_step_q4, filter_y, y_step_q4,
-                                 w, h);
-  } else {
-    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
-                        filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  }
-}
+// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                                uint8_t *dst, ptrdiff_t dst_stride,
+//                                const int16_t *filter_x, int x_step_q4,
+//                                const int16_t *filter_y, int y_step_q4,
+//                                int w, int h);
+// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                               uint8_t *dst, ptrdiff_t dst_stride,
+//                               const int16_t *filter_x, int x_step_q4,
+//                               const int16_t *filter_y, int y_step_q4,
+//                               int w, int h);
+// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                                    uint8_t *dst, ptrdiff_t dst_stride,
+//                                    const int16_t *filter_x, int x_step_q4,
+//                                    const int16_t *filter_y, int y_step_q4,
+//                                    int w, int h);
+// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                                   uint8_t *dst, ptrdiff_t dst_stride,
+//                                   const int16_t *filter_x, int x_step_q4,
+//                                   const int16_t *filter_y, int y_step_q4,
+//                                   int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+            ssse3);
+
+// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                          uint8_t *dst, ptrdiff_t dst_stride,
+//                          const int16_t *filter_x, int x_step_q4,
+//                          const int16_t *filter_y, int y_step_q4,
+//                          int w, int h);
+// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                              uint8_t *dst, ptrdiff_t dst_stride,
+//                              const int16_t *filter_x, int x_step_q4,
+//                              const int16_t *filter_y, int y_step_q4,
+//                              int w, int h);
+FUN_CONV_2D(, ssse3);
+FUN_CONV_2D(avg_ , ssse3);
 #endif
 
 #if HAVE_SSE2
@@ -248,199 +153,41 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
 
-void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h) {
-  /* Ensure the filter can be compressed to int16_t. */
-  if (x_step_q4 == 16 && filter_x[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_h8_sse2(src, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_x);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_h8_sse2(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_h8_sse2(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                          filter_x, x_step_q4, filter_y, y_step_q4,
-                          w, h);
-  }
-}
-
-void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4,
-                              int w, int h) {
-  if (y_step_q4 == 16 && filter_y[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_v8_sse2(src - src_stride * 3, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_y);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_v8_sse2(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_v8_sse2(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                         filter_x, x_step_q4, filter_y, y_step_q4,
-                         w, h);
-  }
-}
-
-void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h) {
-  if (x_step_q4 == 16 && filter_x[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_h8_avg_sse2(src, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_x);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_h8_avg_sse2(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_h8_avg_sse2(src, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_x);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h);
-  }
-}
-
-void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4,
-                              int w, int h) {
-  if (y_step_q4 == 16 && filter_y[3] != 128) {
-    while (w >= 16) {
-      vp9_filter_block1d16_v8_avg_sse2(src - src_stride * 3, src_stride,
-                                    dst, dst_stride,
-                                    h, filter_y);
-      src += 16;
-      dst += 16;
-      w -= 16;
-    }
-    while (w >= 8) {
-      vp9_filter_block1d8_v8_avg_sse2(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 8;
-      dst += 8;
-      w -= 8;
-    }
-    while (w >= 4) {
-      vp9_filter_block1d4_v8_avg_sse2(src - src_stride * 3, src_stride,
-                                   dst, dst_stride,
-                                   h, filter_y);
-      src += 4;
-      dst += 4;
-      w -= 4;
-    }
-  }
-  if (w) {
-    vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4, filter_y, y_step_q4,
-                             w, h);
-  }
-}
-
-void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
-                         uint8_t *dst, ptrdiff_t dst_stride,
-                         const int16_t *filter_x, int x_step_q4,
-                         const int16_t *filter_y, int y_step_q4,
-                         int w, int h) {
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
-  assert(w <= 64);
-  assert(h <= 64);
-  if (x_step_q4 == 16 && y_step_q4 == 16) {
-    vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h + 7);
-    vp9_convolve8_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
-                             filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  } else {
-    vp9_convolve8_c(src, src_stride, dst, dst_stride,
-                    filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  }
-}
-
-void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
-                         uint8_t *dst, ptrdiff_t dst_stride,
-                         const int16_t *filter_x, int x_step_q4,
-                         const int16_t *filter_y, int y_step_q4,
-                         int w, int h) {
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
-  assert(w <= 64);
-  assert(h <= 64);
-  if (x_step_q4 == 16 && y_step_q4 == 16) {
-    vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h + 7);
-    vp9_convolve8_avg_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
-                                 filter_x, x_step_q4, filter_y, y_step_q4,
-                                 w, h);
-  } else {
-    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
-                        filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  }
-}
+// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+//                               uint8_t *dst, ptrdiff_t dst_stride,
+//                               const int16_t *filter_x, int x_step_q4,
+//                               const int16_t *filter_y, int y_step_q4,
+//                               int w, int h);
+// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+//                              uint8_t *dst, ptrdiff_t dst_stride,
+//                              const int16_t *filter_x, int x_step_q4,
+//                              const int16_t *filter_y, int y_step_q4,
+//                              int w, int h);
+// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+//                                   uint8_t *dst, ptrdiff_t dst_stride,
+//                                   const int16_t *filter_x, int x_step_q4,
+//                                   const int16_t *filter_y, int y_step_q4,
+//                                   int w, int h);
+// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+//                                  uint8_t *dst, ptrdiff_t dst_stride,
+//                                  const int16_t *filter_x, int x_step_q4,
+//                                  const int16_t *filter_y, int y_step_q4,
+//                                  int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
+
+// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+//                         uint8_t *dst, ptrdiff_t dst_stride,
+//                         const int16_t *filter_x, int x_step_q4,
+//                         const int16_t *filter_y, int y_step_q4,
+//                         int w, int h);
+// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+//                             uint8_t *dst, ptrdiff_t dst_stride,
+//                             const int16_t *filter_x, int x_step_q4,
+//                             const int16_t *filter_y, int y_step_q4,
+//                             int w, int h);
+FUN_CONV_2D(, sse2);
+FUN_CONV_2D(avg_ , sse2);
 #endif
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 2f6149464..13a5b5a82 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -180,7 +180,7 @@ static INLINE void transpose_4x4(__m128i *res) {
   res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
 }
 
-static void idct4_1d_sse2(__m128i *in) {
+static void idct4_sse2(__m128i *in) {
   const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
@@ -216,7 +216,7 @@ static void idct4_1d_sse2(__m128i *in) {
   in[1] = _mm_shuffle_epi32(in[1], 0x4E);
 }
 
-static void iadst4_1d_sse2(__m128i *in) {
+static void iadst4_sse2(__m128i *in) {
   const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9);
   const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
   const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
@@ -276,20 +276,20 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
 
   switch (tx_type) {
     case 0:  // DCT_DCT
-      idct4_1d_sse2(in);
-      idct4_1d_sse2(in);
+      idct4_sse2(in);
+      idct4_sse2(in);
       break;
     case 1:  // ADST_DCT
-      idct4_1d_sse2(in);
-      iadst4_1d_sse2(in);
+      idct4_sse2(in);
+      iadst4_sse2(in);
       break;
     case 2:  // DCT_ADST
-      iadst4_1d_sse2(in);
-      idct4_1d_sse2(in);
+      iadst4_sse2(in);
+      idct4_sse2(in);
       break;
     case 3:  // ADST_ADST
-      iadst4_1d_sse2(in);
-      iadst4_1d_sse2(in);
+      iadst4_sse2(in);
+      iadst4_sse2(in);
       break;
     default:
       assert(0);
@@ -455,7 +455,7 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
       res1 = _mm_packs_epi32(tmp2, tmp3); \
   }
 
-#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
                  out0, out1, out2, out3, out4, out5, out6, out7)  \
   { \
   /* Stage1 */      \
@@ -573,7 +573,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
                   in0, in1, in2, in3, in4, in5, in6, in7);
 
     // 4-stage 1D idct8x8
-    IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+    IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
              in0, in1, in2, in3, in4, in5, in6, in7);
   }
 
@@ -674,7 +674,7 @@ static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
   out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
 }
 
-static void idct8_1d_sse2(__m128i *in) {
+static void idct8_sse2(__m128i *in) {
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
   const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
@@ -695,11 +695,11 @@ static void idct8_1d_sse2(__m128i *in) {
                 in0, in1, in2, in3, in4, in5, in6, in7);
 
   // 4-stage 1D idct8x8
-  IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+  IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
            in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]);
 }
 
-static void iadst8_1d_sse2(__m128i *in) {
+static void iadst8_sse2(__m128i *in) {
   const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
   const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
   const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
@@ -946,20 +946,20 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
 
   switch (tx_type) {
     case 0:  // DCT_DCT
-      idct8_1d_sse2(in);
-      idct8_1d_sse2(in);
+      idct8_sse2(in);
+      idct8_sse2(in);
       break;
     case 1:  // ADST_DCT
-      idct8_1d_sse2(in);
-      iadst8_1d_sse2(in);
+      idct8_sse2(in);
+      iadst8_sse2(in);
       break;
     case 2:  // DCT_ADST
-      iadst8_1d_sse2(in);
-      idct8_1d_sse2(in);
+      iadst8_sse2(in);
+      idct8_sse2(in);
       break;
     case 3:  // ADST_ADST
-      iadst8_1d_sse2(in);
-      iadst8_1d_sse2(in);
+      iadst8_sse2(in);
+      iadst8_sse2(in);
       break;
     default:
       assert(0);
@@ -1104,7 +1104,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
 
   TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
 
-  IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero,
+  IDCT8(in0, in1, in2, in3, zero, zero, zero, zero,
            in0, in1, in2, in3, in4, in5, in6, in7);
   // Final rounding and shift
   in0 = _mm_adds_epi16(in0, final_rounding);
@@ -1135,7 +1135,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   RECON_AND_STORE(dest, in7);
 }
 
-#define IDCT16_1D \
+#define IDCT16 \
   /* Stage2 */ \
   { \
     const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \
@@ -1264,7 +1264,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
                            stp2_10, stp2_13, stp2_11, stp2_12) \
   }
 
-#define IDCT16_10_1D \
+#define IDCT16_10 \
     /* Stage2 */ \
     { \
       const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \
@@ -1437,7 +1437,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
       array_transpose_8x8(in, in);
       array_transpose_8x8(in+8, in+8);
 
-      IDCT16_1D
+      IDCT16
 
       // Stage7
       curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -1465,7 +1465,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
       array_transpose_8x8(l+i*8, in);
       array_transpose_8x8(r+i*8, in+8);
 
-      IDCT16_1D
+      IDCT16
 
       // 2-D
       in[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -1590,7 +1590,7 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
   res0[15] = tbuf[7];
 }
 
-static void iadst16_1d_8col(__m128i *in) {
+static void iadst16_8col(__m128i *in) {
   // perform 16x16 1-D ADST for 8 columns
   __m128i s[16], x[16], u[32], v[32];
   const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2060,7 +2060,7 @@ static void iadst16_1d_8col(__m128i *in) {
   in[15] = _mm_sub_epi16(kZero, s[1]);
 }
 
-static void idct16_1d_8col(__m128i *in) {
+static void idct16_8col(__m128i *in) {
   const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
   const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
   const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
@@ -2404,16 +2404,16 @@ static void idct16_1d_8col(__m128i *in) {
   in[15] = _mm_sub_epi16(s[0], s[15]);
 }
 
-static void idct16_1d_sse2(__m128i *in0, __m128i *in1) {
+static void idct16_sse2(__m128i *in0, __m128i *in1) {
   array_transpose_16x16(in0, in1);
-  idct16_1d_8col(in0);
-  idct16_1d_8col(in1);
+  idct16_8col(in0);
+  idct16_8col(in1);
 }
 
-static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) {
+static void iadst16_sse2(__m128i *in0, __m128i *in1) {
   array_transpose_16x16(in0, in1);
-  iadst16_1d_8col(in0);
-  iadst16_1d_8col(in1);
+  iadst16_8col(in0);
+  iadst16_8col(in1);
 }
 
 static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
@@ -2502,20 +2502,20 @@ void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
 
   switch (tx_type) {
     case 0:  // DCT_DCT
-      idct16_1d_sse2(in0, in1);
-      idct16_1d_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
       break;
     case 1:  // ADST_DCT
-      idct16_1d_sse2(in0, in1);
-      iadst16_1d_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
       break;
     case 2:  // DCT_ADST
-      iadst16_1d_sse2(in0, in1);
-      idct16_1d_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
       break;
     case 3:  // ADST_ADST
-      iadst16_1d_sse2(in0, in1);
-      iadst16_1d_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
       break;
     default:
       assert(0);
@@ -2732,7 +2732,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
   for (i = 0; i < 2; i++) {
     array_transpose_4X8(l + 8*i, in);
 
-    IDCT16_10_1D
+    IDCT16_10
 
     // Stage7
     in[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -2814,7 +2814,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
     input += 8; \
   }  \
 
-#define IDCT32_1D_34 \
+#define IDCT32_34 \
 /* Stage1 */ \
 { \
   const __m128i zero = _mm_setzero_si128();\
@@ -3115,7 +3115,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
 }
 
 
-#define IDCT32_1D \
+#define IDCT32 \
 /* Stage1 */ \
 { \
   const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \
@@ -3554,7 +3554,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
   array_transpose_8x8(in+16, in+16);
   array_transpose_8x8(in+24, in+24);
 
-  IDCT32_1D
+  IDCT32
 
   // 1_D: Store 32 intermediate results for each 8x32 block.
   col[0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3593,7 +3593,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
       const __m128i zero = _mm_setzero_si128();
       // Transpose 32x8 block to 8x32 block
       array_transpose_8x8(col+i*8, in);
-      IDCT32_1D_34
+      IDCT32_34
 
       // 2_D: Calculate the results and store them to destination.
       in[0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3922,7 +3922,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
       array_transpose_8x8(in+16, in+16);
       array_transpose_8x8(in+24, in+24);
 
-      IDCT32_1D
+      IDCT32
 
       // 1_D: Store 32 intermediate results for each 8x32 block.
       col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3969,7 +3969,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
       array_transpose_8x8(col+j+64, in+16);
       array_transpose_8x8(col+j+96, in+24);
 
-      IDCT32_1D
+      IDCT32
 
       // 2_D: Calculate the results and store them to destination.
       in[0] = _mm_add_epi16(stp1_0, stp1_31);
diff --git a/vp9/common/x86/vp9_postproc_x86.h b/vp9/common/x86/vp9_postproc_x86.h
index 8870215a2..cab9d34f2 100644
--- a/vp9/common/x86/vp9_postproc_x86.h
+++ b/vp9/common/x86/vp9_postproc_x86.h
@@ -12,6 +12,10 @@
 #ifndef VP9_COMMON_X86_VP9_POSTPROC_X86_H_
 #define VP9_COMMON_X86_VP9_POSTPROC_X86_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Note:
  *
  * This platform is commonly built for runtime CPU detection. If you modify
@@ -61,4 +65,8 @@ extern prototype_postproc_addnoise(vp9_plane_add_noise_wmt);
 #endif
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_COMMON_X86_VP9_POSTPROC_X86_H_
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index d66ee2730..be8176a3f 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -33,18 +33,12 @@
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_decodemv.h"
 #include "vp9/decoder/vp9_dsubexp.h"
+#include "vp9/decoder/vp9_dthread.h"
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_read_bit_buffer.h"
 #include "vp9/decoder/vp9_reader.h"
 #include "vp9/decoder/vp9_thread.h"
 
-typedef struct TileWorkerData {
-  VP9_COMMON *cm;
-  vp9_reader bit_reader;
-  DECLARE_ALIGNED(16, MACROBLOCKD, xd);
-  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
-} TileWorkerData;
-
 static int read_be32(const uint8_t *p) {
   return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
 }
@@ -421,8 +415,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
     if (has_second_ref(mbmi))
       set_ref(cm, xd, 1, mi_row, mi_col);
 
-    xd->subpix.filter_x = xd->subpix.filter_y =
-        vp9_get_filter_kernel(mbmi->interp_filter);
+    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
     // Prediction
     vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
@@ -655,14 +648,13 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
 }
 
-static INTERPOLATION_TYPE read_interp_filter_type(
-                              struct vp9_read_bit_buffer *rb) {
-  const INTERPOLATION_TYPE literal_to_type[] = { EIGHTTAP_SMOOTH,
-                                                 EIGHTTAP,
-                                                 EIGHTTAP_SHARP,
-                                                 BILINEAR };
+static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
+  const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH,
+                                              EIGHTTAP,
+                                              EIGHTTAP_SHARP,
+                                              BILINEAR };
   return vp9_rb_read_bit(rb) ? SWITCHABLE
-                             : literal_to_type[vp9_rb_read_literal(rb, 2)];
+                             : literal_to_filter[vp9_rb_read_literal(rb, 2)];
 }
 
 static void read_frame_size(struct vp9_read_bit_buffer *rb,
@@ -699,21 +691,9 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
     vp9_update_frame_size(cm);
   }
 
-  if (cm->fb_list != NULL) {
-    vpx_codec_frame_buffer_t *const ext_fb = &cm->fb_list[cm->new_fb_idx];
-    if (vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
-                                 cm->width, cm->height,
-                                 cm->subsampling_x, cm->subsampling_y,
-                                 VP9_DEC_BORDER_IN_PIXELS, ext_fb,
-                                 cm->realloc_fb_cb, cm->user_priv)) {
-      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
-                         "Failed to allocate external frame buffer");
-    }
-  } else {
-    vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
-                             cm->subsampling_x, cm->subsampling_y,
-                             VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL);
-  }
+  vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
+                           cm->subsampling_x, cm->subsampling_y,
+                           VP9_DEC_BORDER_IN_PIXELS);
 }
 
 static void setup_frame_size(VP9D_COMP *pbi,
@@ -743,7 +723,7 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
   if (!found)
     read_frame_size(rb, &width, &height);
 
-  if (!width || !height)
+  if (width <= 0 || height <= 0)
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Referenced frame with invalid size");
 
@@ -996,7 +976,6 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
       ++pbi->num_tile_workers;
 
       vp9_worker_init(worker);
-      worker->hook = (VP9WorkerHook)tile_worker_hook;
       CHECK_MEM_ERROR(cm, worker->data1,
                       vpx_memalign(32, sizeof(TileWorkerData)));
       CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
@@ -1007,6 +986,11 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
     }
   }
 
+  // Reset tile decoding hook
+  for (n = 0; n < pbi->num_tile_workers; ++n) {
+    pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
+  }
+
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
   vpx_memset(pbi->above_context[0], 0,
@@ -1131,7 +1115,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
   if (cm->show_existing_frame) {
     // Show an existing frame directly.
     int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
-    ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->new_fb_idx, frame_to_show);
+    ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
     pbi->refresh_frame_flags = 0;
     cm->lf.filter_level = 0;
     cm->show_frame = 1;
@@ -1191,14 +1175,14 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
         const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
         const int idx = cm->ref_frame_map[ref];
         cm->frame_refs[i].idx = idx;
-        cm->frame_refs[i].buf = &cm->yv12_fb[idx];
+        cm->frame_refs[i].buf = &cm->frame_bufs[idx].buf;
         cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
       }
 
       setup_frame_size_with_refs(pbi, rb);
 
       cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
-      cm->mcomp_filter_type = read_interp_filter_type(rb);
+      cm->interp_filter = read_interp_filter(rb);
 
       for (i = 0; i < REFS_PER_FRAME; ++i) {
         RefBuffer *const ref_buf = &cm->frame_refs[i];
@@ -1259,8 +1243,8 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
     read_tx_mode_probs(&fc->tx_probs, &r);
   read_coef_probs(fc, cm->tx_mode, &r);
 
-  for (k = 0; k < MBSKIP_CONTEXTS; ++k)
-    vp9_diff_update_prob(&r, &fc->mbskip_probs[k]);
+  for (k = 0; k < SKIP_CONTEXTS; ++k)
+    vp9_diff_update_prob(&r, &fc->skip_probs[k]);
 
   if (!frame_is_intra_only(cm)) {
     nmv_context *const nmvc = &fc->nmvc;
@@ -1268,7 +1252,7 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
 
     read_inter_mode_probs(fc, &r);
 
-    if (cm->mcomp_filter_type == SWITCHABLE)
+    if (cm->interp_filter == SWITCHABLE)
       read_switchable_interp_probs(fc, &r);
 
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
@@ -1335,8 +1319,7 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
   assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
                  sizeof(cm->counts.comp_ref)));
   assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
-  assert(!memcmp(cm->counts.mbskip, zero_counts.mbskip,
-                 sizeof(cm->counts.mbskip)));
+  assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
   assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
 }
 #endif  // NDEBUG
@@ -1407,9 +1390,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
     *p_data_end = decode_tiles(pbi, data + first_partition_size);
   }
 
-  cm->last_width = cm->width;
-  cm->last_height = cm->height;
-
   new_fb->corrupted |= xd->corrupted;
 
   if (!pbi->decoded_key_frame) {
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index 7245a9845..4537bc824 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -12,10 +12,18 @@
 #ifndef VP9_DECODER_VP9_DECODEFRAME_H_
 #define VP9_DECODER_VP9_DECODEFRAME_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct VP9Common;
 struct VP9Decompressor;
 
 void vp9_init_dequantizer(struct VP9Common *cm);
 int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_DECODEFRAME_H_
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 2eb99ea15..e671f0dba 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -152,9 +152,9 @@ static int read_skip_coeff(VP9_COMMON *cm, const MACROBLOCKD *xd,
     return 1;
   } else {
     const int ctx = vp9_get_skip_context(xd);
-    const int skip = vp9_read(r, cm->fc.mbskip_probs[ctx]);
+    const int skip = vp9_read(r, cm->fc.skip_probs[ctx]);
     if (!cm->frame_parallel_decoding_mode)
-      ++cm->counts.mbskip[ctx][skip];
+      ++cm->counts.skip[ctx][skip];
     return skip;
   }
 }
@@ -313,7 +313,7 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 }
 
 
-static INLINE INTERPOLATION_TYPE read_switchable_filter_type(
+static INLINE INTERP_FILTER read_switchable_interp_filter(
     VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) {
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
   const int type = vp9_read_tree(r, vp9_switchable_interp_tree,
@@ -459,9 +459,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
     }
   }
 
-  mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE)
-                      ? read_switchable_filter_type(cm, xd, r)
-                      : cm->mcomp_filter_type;
+  mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
+                      ? read_switchable_interp_filter(cm, xd, r)
+                      : cm->interp_filter;
 
   if (bsize < BLOCK_8X8) {
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h
index 95272bfb3..539c9840e 100644
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -14,10 +14,18 @@
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_reader.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct TileInfo;
 
 void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                         const struct TileInfo *const tile,
                         int mi_row, int mi_col, vp9_reader *r);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_DECODEMV_H_
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h
index 1780a0fb5..ce3d7653d 100644
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -15,8 +15,16 @@
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_reader.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
                             int plane, int block, BLOCK_SIZE plane_bsize,
                             int x, int y, TX_SIZE tx_size, vp9_reader *r);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_DETOKENIZE_H_
diff --git a/vp9/decoder/vp9_dsubexp.h b/vp9/decoder/vp9_dsubexp.h
index 137f546b8..436f434fb 100644
--- a/vp9/decoder/vp9_dsubexp.h
+++ b/vp9/decoder/vp9_dsubexp.h
@@ -14,6 +14,14 @@
 
 #include "vp9/decoder/vp9_reader.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_DSUBEXP_H_
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
new file mode 100644
index 000000000..280e351ae
--- /dev/null
+++ b/vp9/decoder/vp9_dthread.c
@@ -0,0 +1,259 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+
+#if CONFIG_MULTITHREAD
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+  const int kMaxTryLocks = 4000;
+  int locked = 0;
+  int i;
+
+  for (i = 0; i < kMaxTryLocks; ++i) {
+    if (!pthread_mutex_trylock(mutex)) {
+      locked = 1;
+      break;
+    }
+  }
+
+  if (!locked)
+    pthread_mutex_lock(mutex);
+}
+#endif  // CONFIG_MULTITHREAD
+
+static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+  const int nsync = lf_sync->sync_range;
+
+  if (r && !(c & (nsync - 1))) {
+    mutex_lock(&lf_sync->mutex_[r - 1]);
+
+    while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
+      pthread_cond_wait(&lf_sync->cond_[r - 1],
+                        &lf_sync->mutex_[r - 1]);
+    }
+    pthread_mutex_unlock(&lf_sync->mutex_[r - 1]);
+  }
+#else
+  (void)lf_sync;
+  (void)r;
+  (void)c;
+#endif  // CONFIG_MULTITHREAD
+}
+
+static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
+                              const int sb_cols) {
+#if CONFIG_MULTITHREAD
+  const int nsync = lf_sync->sync_range;
+  int cur;
+  // Only signal when there are enough filtered SB for next row to run.
+  int sig = 1;
+
+  if (c < sb_cols - 1) {
+    cur = c;
+    if (c % nsync)
+      sig = 0;
+  } else {
+    cur = sb_cols + nsync;
+  }
+
+  if (sig) {
+    mutex_lock(&lf_sync->mutex_[r]);
+
+    lf_sync->cur_sb_col[r] = cur;
+
+    pthread_cond_signal(&lf_sync->cond_[r]);
+    pthread_mutex_unlock(&lf_sync->mutex_[r]);
+  }
+#else
+  (void)lf_sync;
+  (void)r;
+  (void)c;
+  (void)sb_cols;
+#endif  // CONFIG_MULTITHREAD
+}
+
+// Implement row loopfiltering for each thread.
+static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
+                                VP9_COMMON *const cm, MACROBLOCKD *const xd,
+                                int start, int stop, int y_only,
+                                VP9LfSync *const lf_sync, int num_lf_workers) {
+  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+  int r, c;  // SB row and col
+  LOOP_FILTER_MASK lfm;
+  const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
+
+  for (r = start; r < stop; r += num_lf_workers) {
+    const int mi_row = r << MI_BLOCK_SIZE_LOG2;
+    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
+
+    for (c = 0; c < sb_cols; ++c) {
+      const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+      int plane;
+
+      sync_read(lf_sync, r, c);
+
+      setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
+                     &lfm);
+
+      for (plane = 0; plane < num_planes; ++plane) {
+        vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+      }
+
+      sync_write(lf_sync, r, c, sb_cols);
+    }
+  }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int loop_filter_row_worker(void *arg1, void *arg2) {
+  TileWorkerData *const tile_data = (TileWorkerData*)arg1;
+  LFWorkerData *const lf_data = &tile_data->lfdata;
+
+  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+                      lf_data->start, lf_data->stop, lf_data->y_only,
+                      lf_data->lf_sync, lf_data->num_lf_workers);
+  return 1;
+}
+
+// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
+// threads.
+void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
+                              VP9_COMMON *cm,
+                              MACROBLOCKD *xd,
+                              int frame_filter_level,
+                              int y_only, int partial) {
+  // Number of superblock rows and cols
+  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+  int i;
+
+  // Allocate memory used in thread synchronization.
+  // This always needs to be done even if frame_filter_level is 0.
+  if (!cm->current_video_frame || cm->last_height != cm->height) {
+    VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+
+    if (cm->last_height != cm->height) {
+      const int aligned_last_height =
+          ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);
+      const int last_sb_rows =
+          mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >>
+          MI_BLOCK_SIZE_LOG2;
+
+      vp9_loop_filter_dealloc(lf_sync, last_sb_rows);
+    }
+
+    vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width);
+  }
+
+  if (!frame_filter_level) return;
+
+  vp9_loop_filter_frame_init(cm, frame_filter_level);
+
+  // Initialize cur_sb_col to -1 for all SB rows.
+  vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,
+             sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
+
+  // Set up loopfilter thread data.
+  for (i = 0; i < pbi->num_tile_workers; ++i) {
+    VP9Worker *const worker = &pbi->tile_workers[i];
+    TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
+    LFWorkerData *const lf_data = &tile_data->lfdata;
+
+    worker->hook = (VP9WorkerHook)loop_filter_row_worker;
+
+    // Loopfilter data
+    lf_data->frame_buffer = get_frame_new_buffer(cm);
+    lf_data->cm = cm;
+    lf_data->xd = pbi->mb;
+    lf_data->start = i;
+    lf_data->stop = sb_rows;
+    lf_data->y_only = y_only;   // always do all planes in decoder
+
+    lf_data->lf_sync = &pbi->lf_row_sync;
+    lf_data->num_lf_workers = pbi->num_tile_workers;
+
+    // Start loopfiltering
+    if (i == pbi->num_tile_workers - 1) {
+      vp9_worker_execute(worker);
+    } else {
+      vp9_worker_launch(worker);
+    }
+  }
+
+  // Wait till all rows are finished
+  for (i = 0; i < pbi->num_tile_workers; ++i) {
+    vp9_worker_sync(&pbi->tile_workers[i]);
+  }
+}
+
+// Set up nsync by width.
+static int get_sync_range(int width) {
+  // nsync numbers are picked by testing. For example, for 4k
+  // video, using 4 gives best performance.
+  if (width < 640)
+    return 1;
+  else if (width <= 1280)
+    return 2;
+  else if (width <= 4096)
+    return 4;
+  else
+    return 8;
+}
+
+// Allocate memory for lf row synchronization
+void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
+                           int width) {
+#if CONFIG_MULTITHREAD
+  int i;
+
+  CHECK_MEM_ERROR(cm, lf_sync->mutex_,
+                  vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+  CHECK_MEM_ERROR(cm, lf_sync->cond_,
+                  vpx_malloc(sizeof(*lf_sync->cond_) * rows));
+
+  for (i = 0; i < rows; ++i) {
+    pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+    pthread_cond_init(&lf_sync->cond_[i], NULL);
+  }
+#endif  // CONFIG_MULTITHREAD
+
+  CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
+                  vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+
+  // Set up nsync.
+  lf_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate lf synchronization related mutex and data
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
+#if CONFIG_MULTITHREAD
+  if (lf_sync != NULL) {
+    int i;
+
+    for (i = 0; i < rows; ++i) {
+      pthread_mutex_destroy(&lf_sync->mutex_[i]);
+      pthread_cond_destroy(&lf_sync->cond_[i]);
+    }
+
+    vpx_free(lf_sync->mutex_);
+    vpx_free(lf_sync->cond_);
+    vpx_free(lf_sync->cur_sb_col);
+  }
+#else
+  (void)rows;
+  if (lf_sync != NULL)
+    vpx_free(lf_sync->cur_sb_col);
+#endif  // CONFIG_MULTITHREAD
+}
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
new file mode 100644
index 000000000..4478354ba
--- /dev/null
+++ b/vp9/decoder/vp9_dthread.h
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_DECODER_VP9_DTHREAD_H_
+#define VP9_DECODER_VP9_DTHREAD_H_
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/decoder/vp9_reader.h"
+#include "vp9/decoder/vp9_thread.h"
+
+struct macroblockd;
+struct VP9Common;
+struct VP9Decompressor;
+
+typedef struct TileWorkerData {
+  struct VP9Common *cm;
+  vp9_reader bit_reader;
+  DECLARE_ALIGNED(16, struct macroblockd, xd);
+  DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+
+  // Row-based parallel loopfilter data
+  LFWorkerData lfdata;
+} TileWorkerData;
+
+// Loopfilter row synchronization
+typedef struct VP9LfSyncData {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *mutex_;
+  pthread_cond_t *cond_;
+#endif
+  // Allocate memory to store the loop-filtered superblock index in each row.
+  int *cur_sb_col;
+  // The optimal sync_range for different resolution and platform should be
+  // determined by testing. Currently, it is chosen to be a power-of-2 number.
+  int sync_range;
+} VP9LfSync;
+
+// Allocate memory for loopfilter row synchronization.
+void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
+                           int rows, int width);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
+
+// Multi-threaded loopfilter that uses the tile threads.
+void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi,
+                              struct VP9Common *cm,
+                              struct macroblockd *xd,
+                              int frame_filter_level,
+                              int y_only, int partial);
+
+#endif  // VP9_DECODER_VP9_DTHREAD_H_
diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h
index a4b9c24fc..0fc9d578b 100644
--- a/vp9/decoder/vp9_onyxd.h
+++ b/vp9/decoder/vp9_onyxd.h
@@ -11,14 +11,14 @@
 #ifndef VP9_DECODER_VP9_ONYXD_H_
 #define VP9_DECODER_VP9_ONYXD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "vpx_scale/yv12config.h"
 #include "vp9/common/vp9_ppflags.h"
 #include "vpx/vpx_codec.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef void *VP9D_PTR;
 
 typedef struct {
@@ -63,7 +63,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf);
 void vp9_remove_decompressor(VP9D_PTR comp);
 
 #ifdef __cplusplus
-}
+}  // extern "C"
 #endif
 
 #endif  // VP9_DECODER_VP9_ONYXD_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 75d52c25a..803d536ba 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -27,6 +27,7 @@
 #include "vpx_ports/vpx_timer.h"
 #include "vp9/decoder/vp9_decodeframe.h"
 #include "vp9/decoder/vp9_detokenize.h"
+#include "vp9/decoder/vp9_dthread.h"
 #include "./vpx_scale_rtcd.h"
 
 #define WRITE_RECON_BUFFER 0
@@ -177,13 +178,24 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
     vpx_free(worker->data2);
   }
   vpx_free(pbi->tile_workers);
+
+  if (pbi->num_tile_workers) {
+    VP9_COMMON *const cm = &pbi->common;
+    const int sb_rows =
+        mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+    VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+
+    vp9_loop_filter_dealloc(lf_sync, sb_rows);
+  }
+
   vpx_free(pbi->mi_streams);
   vpx_free(pbi->above_context[0]);
   vpx_free(pbi->above_seg_context);
   vpx_free(pbi);
 }
 
-static int equal_dimensions(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
+static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
+                            const YV12_BUFFER_CONFIG *b) {
     return a->y_height == b->y_height && a->y_width == b->y_width &&
            a->uv_height == b->uv_height && a->uv_width == b->uv_width;
 }
@@ -200,7 +212,8 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr,
    * later commit that adds VP9-specific controls for this functionality.
    */
   if (ref_frame_flag == VP9_LAST_FLAG) {
-    YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[0]];
+    const YV12_BUFFER_CONFIG *const cfg =
+        &cm->frame_bufs[cm->ref_frame_map[0]].buf;
     if (!equal_dimensions(cfg, sd))
       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                          "Incorrect buffer dimensions");
@@ -246,13 +259,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
 
     // Find an empty frame buffer.
     const int free_fb = get_free_fb(cm);
-    // Decrease fb_idx_ref_cnt since it will be increased again in
+    // Decrease ref_count since it will be increased again in
     // ref_cnt_fb() below.
-    cm->fb_idx_ref_cnt[free_fb]--;
+    cm->frame_bufs[free_fb].ref_count--;
 
     // Manage the reference counters and copy image.
-    ref_cnt_fb(cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb);
-    ref_buf->buf = &cm->yv12_fb[*ref_fb_ptr];
+    ref_cnt_fb(cm->frame_bufs, ref_fb_ptr, free_fb);
+    ref_buf->buf = &cm->frame_bufs[*ref_fb_ptr].buf;
     vp8_yv12_copy_frame(sd, ref_buf->buf);
   }
 
@@ -267,7 +280,7 @@ int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
   if (index < 0 || index >= REF_FRAMES)
     return -1;
 
-  *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
+  *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
   return 0;
 }
 
@@ -278,13 +291,13 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
 
   for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
     if (mask & 1)
-      ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->ref_frame_map[ref_index],
+      ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index],
                  cm->new_fb_idx);
     ++ref_index;
   }
 
   cm->frame_to_show = get_frame_new_buffer(cm);
-  cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+  cm->frame_bufs[cm->new_fb_idx].ref_count--;
 
   // Invalidate these references until the next frame starts.
   for (ref_index = 0; ref_index < 3; ref_index++)
@@ -340,8 +353,8 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
     if (cm->frame_refs[0].idx != INT_MAX)
       cm->frame_refs[0].buf->corrupted = 1;
 
-    if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-      cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+    if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
+      cm->frame_bufs[cm->new_fb_idx].ref_count--;
 
     return -1;
   }
@@ -353,8 +366,8 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
   if (retcode < 0) {
     cm->error.error_code = VPX_CODEC_ERROR;
     cm->error.setjmp = 0;
-    if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-      cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+    if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
+      cm->frame_bufs[cm->new_fb_idx].ref_count--;
     return retcode;
   }
 
@@ -370,7 +383,13 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
 #endif
 
   if (!pbi->do_loopfilter_inline) {
-    vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0);
+    // If multiple threads are used to decode tiles, then we use those threads
+    // to do parallel loopfiltering.
+    if (pbi->num_tile_workers) {
+      vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+    } else {
+      vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+    }
   }
 
 #if WRITE_RECON_BUFFER == 2
@@ -390,7 +409,11 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
 
   vp9_clear_system_state();
 
-  cm->last_show_frame = cm->show_frame;
+  cm->last_width = cm->width;
+  cm->last_height = cm->height;
+
+  if (!cm->show_existing_frame)
+    cm->last_show_frame = cm->show_frame;
   if (cm->show_frame) {
     if (!cm->show_existing_frame) {
       // current mip will be the prev_mip for the next frame
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index e90f8923c..6c6c23926 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -14,9 +14,14 @@
 #include "./vpx_config.h"
 
 #include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/decoder/vp9_dthread.h"
 #include "vp9/decoder/vp9_onyxd.h"
 #include "vp9/decoder/vp9_thread.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct VP9Decompressor {
   DECLARE_ALIGNED(16, MACROBLOCKD, mb);
 
@@ -45,6 +50,8 @@ typedef struct VP9Decompressor {
   VP9Worker *tile_workers;
   int num_tile_workers;
 
+  VP9LfSync lf_row_sync;
+
   /* Each tile column has its own MODE_INFO stream. This array indexes them by
      tile column index. */
   MODE_INFO **mi_streams;
@@ -53,4 +60,8 @@ typedef struct VP9Decompressor {
   PARTITION_CONTEXT *above_seg_context;
 } VP9D_COMP;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/decoder/vp9_read_bit_buffer.h b/vp9/decoder/vp9_read_bit_buffer.h
index 41a686837..619e39f1e 100644
--- a/vp9/decoder/vp9_read_bit_buffer.h
+++ b/vp9/decoder/vp9_read_bit_buffer.h
@@ -15,6 +15,10 @@
 
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef void (*vp9_rb_error_handler)(void *data, size_t bit_offset);
 
 struct vp9_read_bit_buffer {
@@ -57,4 +61,8 @@ static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb,
   return vp9_rb_read_bit(rb) ? -value : value;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_READ_BIT_BUFFER_H_
diff --git a/vp9/decoder/vp9_reader.h b/vp9/decoder/vp9_reader.h
index 38cf0f621..8fe6acbc2 100644
--- a/vp9/decoder/vp9_reader.h
+++ b/vp9/decoder/vp9_reader.h
@@ -20,6 +20,10 @@
 
 #include "vp9/common/vp9_prob.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef size_t BD_VALUE;
 
 #define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT)
@@ -100,4 +104,8 @@ static int vp9_read_tree(vp9_reader *r, const vp9_tree_index *tree,
   return -i;
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_DECODER_VP9_READER_H_
diff --git a/vp9/decoder/vp9_thread.c b/vp9/decoder/vp9_thread.c
index d953e72b3..5d31d3d98 100644
--- a/vp9/decoder/vp9_thread.c
+++ b/vp9/decoder/vp9_thread.c
@@ -24,116 +24,6 @@ extern "C" {
 
 #if CONFIG_MULTITHREAD
 
-#if defined(_WIN32)
-
-//------------------------------------------------------------------------------
-// simplistic pthread emulation layer
-
-#include <process.h>  // NOLINT
-
-// _beginthreadex requires __stdcall
-#define THREADFN unsigned int __stdcall
-#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
-
-static int pthread_create(pthread_t* const thread, const void* attr,
-                          unsigned int (__stdcall *start)(void*), void* arg) {
-  (void)attr;
-  *thread = (pthread_t)_beginthreadex(NULL,   /* void *security */
-                                      0,      /* unsigned stack_size */
-                                      start,
-                                      arg,
-                                      0,      /* unsigned initflag */
-                                      NULL);  /* unsigned *thrdaddr */
-  if (*thread == NULL) return 1;
-  SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
-  return 0;
-}
-
-static int pthread_join(pthread_t thread, void** value_ptr) {
-  (void)value_ptr;
-  return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
-          CloseHandle(thread) == 0);
-}
-
-// Mutex
-static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
-  (void)mutexattr;
-  InitializeCriticalSection(mutex);
-  return 0;
-}
-
-static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
-  EnterCriticalSection(mutex);
-  return 0;
-}
-
-static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
-  LeaveCriticalSection(mutex);
-  return 0;
-}
-
-static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
-  DeleteCriticalSection(mutex);
-  return 0;
-}
-
-// Condition
-static int pthread_cond_destroy(pthread_cond_t* const condition) {
-  int ok = 1;
-  ok &= (CloseHandle(condition->waiting_sem_) != 0);
-  ok &= (CloseHandle(condition->received_sem_) != 0);
-  ok &= (CloseHandle(condition->signal_event_) != 0);
-  return !ok;
-}
-
-static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
-  (void)cond_attr;
-  condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
-  condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
-  condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
-  if (condition->waiting_sem_ == NULL ||
-      condition->received_sem_ == NULL ||
-      condition->signal_event_ == NULL) {
-    pthread_cond_destroy(condition);
-    return 1;
-  }
-  return 0;
-}
-
-static int pthread_cond_signal(pthread_cond_t* const condition) {
-  int ok = 1;
-  if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
-    // a thread is waiting in pthread_cond_wait: allow it to be notified
-    ok = SetEvent(condition->signal_event_);
-    // wait until the event is consumed so the signaler cannot consume
-    // the event via its own pthread_cond_wait.
-    ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
-           WAIT_OBJECT_0);
-  }
-  return !ok;
-}
-
-static int pthread_cond_wait(pthread_cond_t* const condition,
-                             pthread_mutex_t* const mutex) {
-  int ok;
-  // note that there is a consumer available so the signal isn't dropped in
-  // pthread_cond_signal
-  if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
-    return 1;
-  // now unlock the mutex so pthread_cond_signal may be issued
-  pthread_mutex_unlock(mutex);
-  ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
-        WAIT_OBJECT_0);
-  ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
-  pthread_mutex_lock(mutex);
-  return !ok;
-}
-
-#else  // _WIN32
-# define THREADFN void*
-# define THREAD_RETURN(val) val
-#endif
-
 //------------------------------------------------------------------------------
 
 static THREADFN thread_loop(void *ptr) {    // thread loop
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index a624f3c2a..2f8728dcf 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -19,14 +19,15 @@
 
 #include "./vpx_config.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
 #if CONFIG_MULTITHREAD
 
 #if defined(_WIN32)
-
+#include <errno.h>  // NOLINT
+#include <process.h>  // NOLINT
 #include <windows.h>  // NOLINT
 typedef HANDLE pthread_t;
 typedef CRITICAL_SECTION pthread_mutex_t;
@@ -36,12 +37,120 @@ typedef struct {
   HANDLE signal_event_;
 } pthread_cond_t;
 
-#else
-
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static INLINE int pthread_create(pthread_t* const thread, const void* attr,
+                                 unsigned int (__stdcall *start)(void*),
+                                 void* arg) {
+  (void)attr;
+  *thread = (pthread_t)_beginthreadex(NULL,   /* void *security */
+                                      0,      /* unsigned stack_size */
+                                      start,
+                                      arg,
+                                      0,      /* unsigned initflag */
+                                      NULL);  /* unsigned *thrdaddr */
+  if (*thread == NULL) return 1;
+  SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+  return 0;
+}
+
+static INLINE int pthread_join(pthread_t thread, void** value_ptr) {
+  (void)value_ptr;
+  return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+          CloseHandle(thread) == 0);
+}
+
+// Mutex
+static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex,
+                                     void* mutexattr) {
+  (void)mutexattr;
+  InitializeCriticalSection(mutex);
+  return 0;
+}
+
+static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) {
+  return TryEnterCriticalSection(mutex) ? 0 : EBUSY;
+}
+
+static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) {
+  EnterCriticalSection(mutex);
+  return 0;
+}
+
+static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
+  LeaveCriticalSection(mutex);
+  return 0;
+}
+
+static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
+  DeleteCriticalSection(mutex);
+  return 0;
+}
+
+// Condition
+static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) {
+  int ok = 1;
+  ok &= (CloseHandle(condition->waiting_sem_) != 0);
+  ok &= (CloseHandle(condition->received_sem_) != 0);
+  ok &= (CloseHandle(condition->signal_event_) != 0);
+  return !ok;
+}
+
+static INLINE int pthread_cond_init(pthread_cond_t *const condition,
+                                    void* cond_attr) {
+  (void)cond_attr;
+  condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+  condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+  condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+  if (condition->waiting_sem_ == NULL ||
+      condition->received_sem_ == NULL ||
+      condition->signal_event_ == NULL) {
+    pthread_cond_destroy(condition);
+    return 1;
+  }
+  return 0;
+}
+
+static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
+  int ok = 1;
+  if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+    // a thread is waiting in pthread_cond_wait: allow it to be notified
+    ok = SetEvent(condition->signal_event_);
+    // wait until the event is consumed so the signaler cannot consume
+    // the event via its own pthread_cond_wait.
+    ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+           WAIT_OBJECT_0);
+  }
+  return !ok;
+}
+
+static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
+                                    pthread_mutex_t *const mutex) {
+  int ok;
+  // note that there is a consumer available so the signal isn't dropped in
+  // pthread_cond_signal
+  if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+    return 1;
+  // now unlock the mutex so pthread_cond_signal may be issued
+  pthread_mutex_unlock(mutex);
+  ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+        WAIT_OBJECT_0);
+  ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+  pthread_mutex_lock(mutex);
+  return !ok;
+}
+#else  // _WIN32
 #include <pthread.h> // NOLINT
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
 
-#endif    /* _WIN32 */
-#endif    /* CONFIG_MULTITHREAD */
+#endif  // CONFIG_MULTITHREAD
 
 // State of the worker thread object
 typedef enum {
@@ -91,7 +200,7 @@ void vp9_worker_end(VP9Worker* const worker);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 2ab4c7907..ede744e7f 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -124,8 +124,8 @@ static int write_skip_coeff(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
 void vp9_update_skip_probs(VP9_COMMON *cm, vp9_writer *w) {
   int k;
 
-  for (k = 0; k < MBSKIP_CONTEXTS; ++k)
-    vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], cm->counts.mbskip[k]);
+  for (k = 0; k < SKIP_CONTEXTS; ++k)
+    vp9_cond_prob_diff_update(w, &cm->fc.skip_probs[k], cm->counts.skip[k]);
 }
 
 static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) {
@@ -135,11 +135,6 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) {
     prob_diff_update(vp9_switchable_interp_tree,
                      cm->fc.switchable_interp_prob[j],
                      cm->counts.switchable_interp[j], SWITCHABLE_FILTERS, w);
-
-#ifdef MODE_STATS
-  if (!cpi->dummy_packing)
-    update_switchable_interp_stats(cm);
-#endif
 }
 
 static void pack_mb_tokens(vp9_writer* const w,
@@ -330,13 +325,13 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
       }
     }
 
-    if (cm->mcomp_filter_type == SWITCHABLE) {
+    if (cm->interp_filter == SWITCHABLE) {
       const int ctx = vp9_get_pred_context_switchable_interp(xd);
       vp9_write_token(bc, vp9_switchable_interp_tree,
                       cm->fc.switchable_interp_prob[ctx],
                       &switchable_interp_encodings[mi->interp_filter]);
     } else {
-      assert(mi->interp_filter == cm->mcomp_filter_type);
+      assert(mi->interp_filter == cm->interp_filter);
     }
 
     if (bsize < BLOCK_8X8) {
@@ -912,24 +907,20 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
         vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
                                   ct_32x32p[j]);
     }
-#ifdef MODE_STATS
-    if (!cpi->dummy_packing)
-      update_tx_count_stats(cm);
-#endif
   }
 }
 
-static void write_interp_filter_type(INTERPOLATION_TYPE type,
-                                     struct vp9_write_bit_buffer *wb) {
-  const int type_to_literal[] = { 1, 0, 2, 3 };
+static void write_interp_filter(INTERP_FILTER filter,
+                                struct vp9_write_bit_buffer *wb) {
+  const int filter_to_literal[] = { 1, 0, 2, 3 };
 
-  vp9_wb_write_bit(wb, type == SWITCHABLE);
-  if (type != SWITCHABLE)
-    vp9_wb_write_literal(wb, type_to_literal[type], 2);
+  vp9_wb_write_bit(wb, filter == SWITCHABLE);
+  if (filter != SWITCHABLE)
+    vp9_wb_write_literal(wb, filter_to_literal[filter], 2);
 }
 
-static void fix_mcomp_filter_type(VP9_COMMON *cm) {
-  if (cm->mcomp_filter_type == SWITCHABLE) {
+static void fix_interp_filter(VP9_COMMON *cm) {
+  if (cm->interp_filter == SWITCHABLE) {
     // Check to see if only one of the filters is actually used
     int count[SWITCHABLE_FILTERS];
     int i, j, c = 0;
@@ -943,7 +934,7 @@ static void fix_mcomp_filter_type(VP9_COMMON *cm) {
       // Only one filter is used. So set the filter at frame level
       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
         if (count[i]) {
-          cm->mcomp_filter_type = i;
+          cm->interp_filter = i;
           break;
         }
       }
@@ -1171,8 +1162,8 @@ static void write_uncompressed_header(VP9_COMP *cpi,
 
       vp9_wb_write_bit(wb, cm->allow_high_precision_mv);
 
-      fix_mcomp_filter_type(cm);
-      write_interp_filter_type(cm->mcomp_filter_type, wb);
+      fix_interp_filter(cm);
+      write_interp_filter(cm->interp_filter, wb);
     }
   }
 
@@ -1223,7 +1214,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
 
     vp9_zero(cm->counts.inter_mode);
 
-    if (cm->mcomp_filter_type == SWITCHABLE)
+    if (cm->interp_filter == SWITCHABLE)
       update_switchable_interp_probs(cpi, &header_bc);
 
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h
index 52be50272..94bec8a43 100644
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -12,6 +12,14 @@
 #ifndef VP9_ENCODER_VP9_BITSTREAM_H_
 #define VP9_ENCODER_VP9_BITSTREAM_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_update_skip_probs(VP9_COMMON *cm, vp9_writer *bc);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_BITSTREAM_H_
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index c1b95817f..713cc5132 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -17,6 +17,10 @@
 #include "vpx_ports/mem.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // motion search site
 typedef struct {
   MV mv;
@@ -58,7 +62,7 @@ typedef struct {
   // motion vector cache for adaptive motion search control in partition
   // search loop
   int_mv pred_mv[MAX_REF_FRAMES];
-  int pred_filter_type;
+  INTERP_FILTER pred_interp_filter;
 
   // Bit flag for each mode whether it has high error in comparison to others.
   unsigned int modes_with_high_error;
@@ -235,23 +239,8 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
   }
 }
 
-struct rdcost_block_args {
-  MACROBLOCK *x;
-  ENTROPY_CONTEXT t_above[16];
-  ENTROPY_CONTEXT t_left[16];
-  TX_SIZE tx_size;
-  int bw;
-  int bh;
-  int rate;
-  int64_t dist;
-  int64_t sse;
-  int this_rate;
-  int64_t this_dist;
-  int64_t this_sse;
-  int64_t this_rd;
-  int64_t best_rd;
-  int skip;
-  const int16_t *scan, *nb;
-};
+#ifdef __cplusplus
+}  // extern "C"
+#endif
 
 #endif  // VP9_ENCODER_VP9_BLOCK_H_
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 0f4a6bb63..a840b480a 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -997,7 +997,7 @@ static INLINE int half_round_shift(int input) {
   return rv;
 }
 
-static void dct32_1d(const int *input, int *output, int round) {
+static void fdct32(const int *input, int *output, int round) {
   int step[32];
   // Stage 1
   step[0] = input[0] + input[(32 - 1)];
@@ -1329,7 +1329,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
     int temp_in[32], temp_out[32];
     for (j = 0; j < 32; ++j)
       temp_in[j] = input[j * stride + i] * 4;
-    dct32_1d(temp_in, temp_out, 0);
+    fdct32(temp_in, temp_out, 0);
     for (j = 0; j < 32; ++j)
       output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
   }
@@ -1339,13 +1339,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
     int temp_in[32], temp_out[32];
     for (j = 0; j < 32; ++j)
       temp_in[j] = output[j + i * 32];
-    dct32_1d(temp_in, temp_out, 0);
+    fdct32(temp_in, temp_out, 0);
     for (j = 0; j < 32; ++j)
       out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
   }
 }
 
-// Note that although we use dct_32_round in dct32_1d computation flow,
+// Note that although we use dct_32_round in dct32 computation flow,
 // this 2d fdct32x32 for rate-distortion optimization loop is operating
 // within 16 bits precision.
 void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
@@ -1357,7 +1357,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
     int temp_in[32], temp_out[32];
     for (j = 0; j < 32; ++j)
       temp_in[j] = input[j * stride + i] * 4;
-    dct32_1d(temp_in, temp_out, 0);
+    fdct32(temp_in, temp_out, 0);
     for (j = 0; j < 32; ++j)
       // TODO(cd): see quality impact of only doing
       //           output[j * 32 + i] = (temp_out[j] + 1) >> 2;
@@ -1370,7 +1370,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
     int temp_in[32], temp_out[32];
     for (j = 0; j < 32; ++j)
       temp_in[j] = output[j + i * 32];
-    dct32_1d(temp_in, temp_out, 1);
+    fdct32(temp_in, temp_out, 1);
     for (j = 0; j < 32; ++j)
       out[j + i * 32] = temp_out[j];
   }
diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h
index aaf976d93..cf5f001a9 100644
--- a/vp9/encoder/vp9_dct.h
+++ b/vp9/encoder/vp9_dct.h
@@ -12,6 +12,10 @@
 #ifndef VP9_ENCODER_VP9_DCT_H_
 #define VP9_ENCODER_VP9_DCT_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
                 int stride);
 
@@ -21,4 +25,8 @@ void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
                   int stride);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_DCT_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 9d02c8f95..7b6da6c39 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -380,8 +380,10 @@ static void select_in_frame_q_segment(VP9_COMP *cpi,
       segment = 0;
     }
 
-    complexity_metric =
-      clamp((int)((projected_rate * 64) / target_rate), 16, 255);
+    if (target_rate > 0) {
+      complexity_metric =
+        clamp((int)((projected_rate * 64) / target_rate), 16, 255);
+    }
   }
 
   // Fill in the entires in the segment map corresponding to this SB64
@@ -505,7 +507,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
       vp9_update_mv_count(cpi, x, best_mv);
     }
 
-    if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
+    if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
       const int ctx = vp9_get_pred_context_switchable_interp(xd);
       ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
     }
@@ -1029,131 +1031,171 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
   }
   return 0;
 }
-
-// TODO(jingning) This currently serves as a test framework for non-RD mode
-// decision. To be continued on optimizing the partition type decisions.
-static void pick_partition_type(VP9_COMP *cpi,
-                                const TileInfo *const tile,
-                                MODE_INFO **mi_8x8, TOKENEXTRA **tp,
-                                int mi_row, int mi_col,
-                                BLOCK_SIZE bsize, int *rate, int64_t *dist,
-                                int do_recon) {
+static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
+                         BLOCK_SIZE bsize, int output_enabled) {
+  int i;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
-  const int mi_stride = cm->mode_info_stride;
-  const int num_8x8_subsize = (num_8x8_blocks_wide_lookup[bsize] >> 1);
-  int i;
-  PARTITION_TYPE partition = PARTITION_NONE;
-  BLOCK_SIZE subsize;
-  BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
-  int sub_rate[4] = {0};
-  int64_t sub_dist[4] = {0};
-  int mi_offset;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  struct macroblock_plane *const p = x->plane;
+  struct macroblockd_plane *const pd = xd->plane;
+  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
-    return;
+  const int mb_mode_index = ctx->best_mode_index;
+  int max_plane;
 
-  partition = partition_lookup[b_width_log2(bsize)][bs_type];
-  subsize = get_subsize(bsize, partition);
+  max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+  for (i = 0; i < max_plane; ++i) {
+    p[i].coeff = ctx->coeff_pbuf[i][1];
+    p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
+    pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
+    p[i].eobs = ctx->eobs_pbuf[i][1];
+  }
+
+  for (i = max_plane; i < MAX_MB_PLANE; ++i) {
+    p[i].coeff = ctx->coeff_pbuf[i][2];
+    p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+    pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+    p[i].eobs = ctx->eobs_pbuf[i][2];
+  }
+
+  x->skip = ctx->skip;
+
+  if (frame_is_intra_only(cm)) {
+#if CONFIG_INTERNAL_STATS
+    static const int kf_mode_index[] = {
+      THR_DC /*DC_PRED*/,
+      THR_V_PRED /*V_PRED*/,
+      THR_H_PRED /*H_PRED*/,
+      THR_D45_PRED /*D45_PRED*/,
+      THR_D135_PRED /*D135_PRED*/,
+      THR_D117_PRED /*D117_PRED*/,
+      THR_D153_PRED /*D153_PRED*/,
+      THR_D207_PRED /*D207_PRED*/,
+      THR_D63_PRED /*D63_PRED*/,
+      THR_TM /*TM_PRED*/,
+    };
+    ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
+#endif
+  } else {
+    // Note how often each mode chosen as best
+    cpi->mode_chosen_counts[mb_mode_index]++;
+    if (is_inter_block(mbmi) &&
+        (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
+      int_mv best_mv[2];
+      for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+        best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+      vp9_update_mv_count(cpi, x, best_mv);
+    }
+
+    if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
+      const int ctx = vp9_get_pred_context_switchable_interp(xd);
+      ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+    }
+  }
+}
+
+static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
+                     TOKENEXTRA **tp, int mi_row, int mi_col,
+                     int output_enabled, BLOCK_SIZE bsize) {
+  MACROBLOCK *const x = &cpi->mb;
 
   if (bsize < BLOCK_8X8) {
     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
     // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
+    if (x->ab_index > 0)
       return;
-    }
+  }
+  set_offsets(cpi, tile, mi_row, mi_col, bsize);
+  update_state_rt(cpi, get_block_context(x, bsize), bsize, output_enabled);
+
+  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+  update_stats(cpi);
+
+  (*tp)->token = EOSB_TOKEN;
+  (*tp)++;
+}
+
+static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
+                      TOKENEXTRA **tp, int mi_row, int mi_col,
+                      int output_enabled, BLOCK_SIZE bsize) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+  int ctx;
+  PARTITION_TYPE partition;
+  BLOCK_SIZE subsize;
+
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+    return;
+
+  if (bsize >= BLOCK_8X8) {
+    MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+    const int idx_str = xd->mode_info_stride * mi_row + mi_col;
+    MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
+    ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+                                 mi_row, mi_col, bsize);
+    subsize = mi_8x8[0]->mbmi.sb_type;
+
   } else {
-    *(get_sb_partitioning(x, bsize)) = subsize;
+    ctx = 0;
+    subsize = BLOCK_4X4;
   }
 
+  partition = partition_lookup[bsl][subsize];
+
   switch (partition) {
     case PARTITION_NONE:
-      rd_pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist,
-                       bsize, get_block_context(x, bsize), INT64_MAX);
+      if (output_enabled && bsize >= BLOCK_8X8)
+        cm->counts.partition[ctx][PARTITION_NONE]++;
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
       break;
-    case PARTITION_HORZ:
+    case PARTITION_VERT:
+      if (output_enabled)
+        cm->counts.partition[ctx][PARTITION_VERT]++;
       *get_sb_index(x, subsize) = 0;
-      rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
-                       subsize, get_block_context(x, subsize), INT64_MAX);
-      if (bsize >= BLOCK_8X8 && mi_row + num_8x8_subsize < cm->mi_rows) {
-        update_state(cpi, get_block_context(x, subsize), subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      if (mi_col + hbs < cm->mi_cols) {
         *get_sb_index(x, subsize) = 1;
-        rd_pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col,
-                         &sub_rate[1], &sub_dist[1], subsize,
-                         get_block_context(x, subsize), INT64_MAX);
+        encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
+                    subsize);
       }
-      *rate = sub_rate[0] + sub_rate[1];
-      *dist = sub_dist[0] + sub_dist[1];
       break;
-    case PARTITION_VERT:
+    case PARTITION_HORZ:
+      if (output_enabled)
+        cm->counts.partition[ctx][PARTITION_HORZ]++;
       *get_sb_index(x, subsize) = 0;
-      rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
-                       subsize, get_block_context(x, subsize), INT64_MAX);
-      if (bsize >= BLOCK_8X8 && mi_col + num_8x8_subsize < cm->mi_cols) {
-        update_state(cpi, get_block_context(x, subsize), subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      if (mi_row + hbs < cm->mi_rows) {
         *get_sb_index(x, subsize) = 1;
-        rd_pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize,
-                         &sub_rate[1], &sub_dist[1], subsize,
-                         get_block_context(x, subsize), INT64_MAX);
+        encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
+                    subsize);
       }
-      *rate = sub_rate[0] + sub_rate[1];
-      *dist = sub_dist[1] + sub_dist[1];
       break;
     case PARTITION_SPLIT:
-      *get_sb_index(x, subsize) = 0;
-      pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize,
-                          &sub_rate[0], &sub_dist[0], 0);
-
-      if ((mi_col + num_8x8_subsize) < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
-        pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize, tp,
-                            mi_row, mi_col + num_8x8_subsize, subsize,
-                            &sub_rate[1], &sub_dist[1], 0);
-      }
-
-      if ((mi_row + num_8x8_subsize) < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 2;
-        pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize * mi_stride, tp,
-                            mi_row + num_8x8_subsize, mi_col, subsize,
-                            &sub_rate[2], &sub_dist[2], 0);
-      }
-
-      if ((mi_col + num_8x8_subsize) < cm->mi_cols &&
-          (mi_row + num_8x8_subsize) < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 3;
-        mi_offset = num_8x8_subsize * mi_stride + num_8x8_subsize;
-        pick_partition_type(cpi, tile, mi_8x8 + mi_offset, tp,
-                            mi_row + num_8x8_subsize, mi_col + num_8x8_subsize,
-                            subsize, &sub_rate[3], &sub_dist[3], 0);
-      }
-
-      for (i = 0; i < 4; ++i) {
-        *rate += sub_rate[i];
-        *dist += sub_dist[i];
-      }
+      subsize = get_subsize(bsize, PARTITION_SPLIT);
+      if (output_enabled)
+        cm->counts.partition[ctx][PARTITION_SPLIT]++;
 
+      *get_sb_index(x, subsize) = 0;
+      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      *get_sb_index(x, subsize) = 1;
+      encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
+                   subsize);
+      *get_sb_index(x, subsize) = 2;
+      encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
+                   subsize);
+      *get_sb_index(x, subsize) = 3;
+      encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+                subsize);
       break;
     default:
-      assert(0);
+      assert("Invalid partition type.");
   }
 
-  if (do_recon) {
-    int output_enabled = (bsize == BLOCK_64X64);
-
-    // Check the projected output rate for this SB against it's target
-    // and and if necessary apply a Q delta using segmentation to get
-    // closer to the target.
-    if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
-      select_in_frame_q_segment(cpi, mi_row, mi_col,
-                                output_enabled, *rate);
-    }
-
-    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
-  }
+  if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+    update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
+                             mi_row, mi_col, subsize, bsize);
 }
 
 static void rd_use_partition(VP9_COMP *cpi,
@@ -1444,15 +1486,19 @@ static void rd_use_partition(VP9_COMP *cpi,
 }
 
 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
-  BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
-  BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
-  BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
+  BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,
+  BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,
+  BLOCK_8X8,   BLOCK_8X8,   BLOCK_8X8,
+  BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+  BLOCK_16X16
 };
 
 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
-  BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
-  BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
-  BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
+  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,
+  BLOCK_16X16, BLOCK_32X32, BLOCK_32X32,
+  BLOCK_32X32, BLOCK_64X64, BLOCK_64X64,
+  BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
+  BLOCK_64X64
 };
 
 // Look at all the mode_info entries for blocks that are part of this
@@ -1538,9 +1584,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
     }
   }
 
-  // Give a bit of leaway either side of the observed min and max
-  *min_block_size = min_partition_size[*min_block_size];
-  *max_block_size = max_partition_size[*max_block_size];
+  // adjust observed min and max
+  if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
+    *min_block_size = min_partition_size[*min_block_size];
+    *max_block_size = max_partition_size[*max_block_size];
+  }
 
   // Check border cases where max and min from neighbours may not be legal.
   *max_block_size = find_partition_size(*max_block_size,
@@ -1788,9 +1836,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       *get_sb_index(x, subsize) = i;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, get_block_context(x, bsize));
-      if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 &&
+      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_filter_type =
+        get_block_context(x, subsize)->pred_interp_filter =
             get_block_context(x, bsize)->mic.mbmi.interp_filter;
       rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
                         &this_rate, &this_dist, i != 3, best_rd - sum_rd);
@@ -1839,9 +1887,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, get_block_context(x, bsize));
-    if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 &&
+    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      get_block_context(x, subsize)->pred_filter_type =
+      get_block_context(x, subsize)->pred_interp_filter =
           get_block_context(x, bsize)->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                      get_block_context(x, subsize), best_rd);
@@ -1854,9 +1902,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, get_block_context(x, bsize));
-      if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 &&
+      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_filter_type =
+        get_block_context(x, subsize)->pred_interp_filter =
             get_block_context(x, bsize)->mic.mbmi.interp_filter;
       rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
                        &this_dist, subsize, get_block_context(x, subsize),
@@ -1892,9 +1940,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, get_block_context(x, bsize));
-    if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 &&
+    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      get_block_context(x, subsize)->pred_filter_type =
+      get_block_context(x, subsize)->pred_interp_filter =
           get_block_context(x, bsize)->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                      get_block_context(x, subsize), best_rd);
@@ -1906,9 +1954,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, get_block_context(x, bsize));
-      if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 &&
+      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_filter_type =
+        get_block_context(x, subsize)->pred_interp_filter =
             get_block_context(x, bsize)->mic.mbmi.interp_filter;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
                        &this_dist, subsize, get_block_context(x, subsize),
@@ -1996,34 +2044,6 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
   restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
 }
 
-static void encode_sb_row_rt(VP9_COMP *cpi, const TileInfo *const tile,
-                             int mi_row, TOKENEXTRA **tp) {
-  VP9_COMMON *const cm = &cpi->common;
-  int mi_col;
-
-  cpi->sf.always_this_block_size = BLOCK_8X8;
-
-  // Initialize the left context for the new SB row
-  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
-  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
-
-  // Code each SB in the row
-  for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
-       mi_col += MI_BLOCK_SIZE) {
-    int dummy_rate;
-    int64_t dummy_dist;
-    const int idx_str = cm->mode_info_stride * mi_row + mi_col;
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-
-    vp9_zero(cpi->mb.pred_mv);
-
-    set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
-    set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
-    pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                        &dummy_rate, &dummy_dist, 1);
-  }
-}
-
 static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                           int mi_row, TOKENEXTRA **tp) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2048,7 +2068,7 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
       for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index)
         for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index)
           for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index)
-            get_block_context(x, i)->pred_filter_type = SWITCHABLE;
+            get_block_context(x, i)->pred_interp_filter = SWITCHABLE;
     }
 
     vp9_zero(cpi->mb.pred_mv);
@@ -2136,7 +2156,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
   vp9_zero(cm->counts.single_ref);
   vp9_zero(cm->counts.comp_ref);
   vp9_zero(cm->counts.tx);
-  vp9_zero(cm->counts.mbskip);
+  vp9_zero(cm->counts.skip);
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
@@ -2250,11 +2270,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
                mi_row < tile.mi_row_end; mi_row += 8)
-#if 1
             encode_sb_row(cpi, &tile, mi_row, &tp);
-#else
-            encode_sb_row_rt(cpi, &tile, mi_row, &tp);
-#endif
 
           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
           assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2395,15 +2411,15 @@ static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
   }
 }
 
-static int get_frame_type(VP9_COMP *cpi) {
+static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) {
   if (frame_is_intra_only(&cpi->common))
-    return 0;
+    return INTRA_FRAME;
   else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
-    return 3;
+    return ALTREF_FRAME;
   else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
-    return 1;
+    return LAST_FRAME;
   else
-    return 2;
+    return GOLDEN_FRAME;
 }
 
 static void select_tx_mode(VP9_COMP *cpi) {
@@ -2433,6 +2449,264 @@ static void select_tx_mode(VP9_COMP *cpi) {
     }
   }
 }
+// Start RTC Exploration
+typedef enum {
+  BOTH_ZERO = 0,
+  ZERO_PLUS_PREDICTED = 1,
+  BOTH_PREDICTED = 2,
+  NEW_PLUS_NON_INTRA = 3,
+  BOTH_NEW = 4,
+  INTRA_PLUS_NON_INTRA = 5,
+  BOTH_INTRA = 6,
+  INVALID_CASE = 9
+} motion_vector_context;
+
+static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
+                          MB_PREDICTION_MODE mode, int mi_row, int mi_col) {
+  mbmi->interp_filter = EIGHTTAP;
+  mbmi->mode = mode;
+  mbmi->mv[0].as_int = 0;
+  mbmi->mv[1].as_int = 0;
+  if (mode < NEARESTMV) {
+    mbmi->ref_frame[0] = INTRA_FRAME;
+  } else {
+    mbmi->ref_frame[0] = LAST_FRAME;
+  }
+
+  mbmi->ref_frame[1] = INTRA_FRAME;
+  mbmi->tx_size = max_txsize_lookup[bsize];
+  mbmi->uv_mode = mode;
+  mbmi->skip_coeff = 0;
+  mbmi->sb_type = bsize;
+  mbmi->segment_id = 0;
+}
+static INLINE int get_block_row(int b32i, int b16i, int b8i) {
+  return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
+}
+static INLINE int get_block_col(int b32i, int b16i, int b8i) {
+  return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
+}
+static void rtc_use_partition(VP9_COMP *cpi,
+                             const TileInfo *const tile,
+                             MODE_INFO **mi_8x8,
+                             TOKENEXTRA **tp, int mi_row, int mi_col,
+                             BLOCK_SIZE bsize, int *rate, int64_t *dist,
+                             int do_recon) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  const int mis = cm->mode_info_stride;
+  int mi_width = num_8x8_blocks_wide_lookup[cpi->sf.always_this_block_size];
+  int mi_height = num_8x8_blocks_high_lookup[cpi->sf.always_this_block_size];
+  int i, j;
+  int chosen_rate = INT_MAX;
+  int64_t chosen_dist = INT_MAX;
+  MB_PREDICTION_MODE mode = DC_PRED;
+  int row8x8_remaining = tile->mi_row_end - mi_row;
+  int col8x8_remaining = tile->mi_col_end - mi_col;
+  int b32i;
+  x->fast_ms = 0;
+  x->subblock_ref = 0;
+  for (b32i = 0; b32i < 4; b32i++) {
+    int b16i;
+    for (b16i = 0; b16i < 4; b16i++) {
+      int b8i;
+      int block_row = get_block_row(b32i, b16i, 0);
+      int block_col = get_block_col(b32i, b16i, 0);
+      int index = block_row * mis + block_col;
+      int rate;
+      int64_t dist;
+
+      int_mv frame_nearest_mv[MAX_REF_FRAMES];
+      int_mv frame_near_mv[MAX_REF_FRAMES];
+      struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
+
+      // Find a partition size that fits
+      bsize = find_partition_size(cpi->sf.always_this_block_size,
+                                  (row8x8_remaining - block_row),
+                                  (col8x8_remaining - block_col),
+                                  &mi_height, &mi_width);
+      mi_8x8[index] = mi_8x8[0] + index;
+
+      set_mi_row_col(xd, tile, mi_row + block_row, mi_height,
+                     mi_col + block_col, mi_width, cm->mi_rows, cm->mi_cols);
+
+      xd->mi_8x8 = mi_8x8 + index;
+
+      if (cm->frame_type != KEY_FRAME) {
+        set_offsets(cpi, tile, mi_row + block_row, mi_col + block_col, bsize);
+
+        vp9_pick_inter_mode(cpi, x, tile,
+                            mi_row + block_row, mi_col + block_col,
+                            &rate, &dist, bsize);
+      } else {
+        set_mode_info(&mi_8x8[index]->mbmi, bsize, mode,
+                      mi_row + block_row, mi_col + block_col);
+        vp9_setup_buffer_inter(cpi, x, tile,
+                               LAST_FRAME, cpi->sf.always_this_block_size,
+                               mi_row + block_row, mi_col + block_col,
+                               frame_nearest_mv, frame_near_mv, yv12_mb);
+      }
+
+      for (j = 0; j < mi_height; j++)
+        for (i = 0; i < mi_width; i++)
+          if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > i
+            && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > j) {
+            mi_8x8[index+ i + j * mis] = mi_8x8[index];
+          }
+
+      for (b8i = 0; b8i < 4; b8i++) {
+      }
+    }
+  }
+  encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
+
+  *rate = chosen_rate;
+  *dist = chosen_dist;
+}
+
+static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+                              int mi_row, TOKENEXTRA **tp) {
+  VP9_COMMON * const cm = &cpi->common;
+  int mi_col;
+
+  // Initialize the left context for the new SB row
+  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
+  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+
+  // Code each SB in the row
+  for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+       mi_col += MI_BLOCK_SIZE) {
+    int dummy_rate;
+    int64_t dummy_dist;
+
+    const int idx_str = cm->mode_info_stride * mi_row + mi_col;
+    MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
+
+    cpi->mb.source_variance = UINT_MAX;
+    set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+    set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
+    rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                     &dummy_rate, &dummy_dist, 1);
+  }
+}
+
+
+static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+  int mi_row;
+  MACROBLOCK * const x = &cpi->mb;
+  VP9_COMMON * const cm = &cpi->common;
+  MACROBLOCKD * const xd = &x->e_mbd;
+
+//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
+//           cpi->common.current_video_frame, cpi->common.show_frame,
+//           cm->frame_type);
+
+// debug output
+#if DBG_PRNT_SEGMAP
+  {
+    FILE *statsfile;
+    statsfile = fopen("segmap2.stt", "a");
+    fprintf(statsfile, "\n");
+    fclose(statsfile);
+  }
+#endif
+
+  vp9_zero(cm->counts.switchable_interp);
+  vp9_zero(cpi->tx_stepdown_count);
+
+  xd->mi_8x8 = cm->mi_grid_visible;
+  // required for vp9_frame_init_quantizer
+  xd->mi_8x8[0] = cm->mi;
+
+  xd->last_mi = cm->prev_mi;
+
+  vp9_zero(cpi->common.counts.mv);
+  vp9_zero(cpi->coef_counts);
+  vp9_zero(cm->counts.eob_branch);
+
+  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
+      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+  switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
+
+  vp9_frame_init_quantizer(cpi);
+
+  vp9_initialize_rd_consts(cpi);
+  vp9_initialize_me_consts(cpi, cm->base_qindex);
+  switch_tx_mode(cpi);
+  cpi->sf.always_this_block_size = BLOCK_16X16;
+
+  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+    // Initialize encode frame context.
+    init_encode_frame_mb_context(cpi);
+
+    // Build a frame level activity map
+    build_activity_map(cpi);
+  }
+
+  // Re-initialize encode frame context.
+  init_encode_frame_mb_context(cpi);
+
+  vp9_zero(cpi->rd_comp_pred_diff);
+  vp9_zero(cpi->rd_filter_diff);
+  vp9_zero(cpi->rd_tx_select_diff);
+  vp9_zero(cpi->rd_tx_select_threshes);
+
+  set_prev_mi(cm);
+
+  {
+    struct vpx_usec_timer emr_timer;
+    vpx_usec_timer_start(&emr_timer);
+
+    {
+      // Take tiles into account and give start/end MB
+      int tile_col, tile_row;
+      TOKENEXTRA *tp = cpi->tok;
+      const int tile_cols = 1 << cm->log2_tile_cols;
+      const int tile_rows = 1 << cm->log2_tile_rows;
+
+      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+        for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+          TileInfo tile;
+          TOKENEXTRA *tp_old = tp;
+
+          // For each row of SBs in the frame
+          vp9_tile_init(&tile, cm, tile_row, tile_col);
+          for (mi_row = tile.mi_row_start;
+               mi_row < tile.mi_row_end; mi_row += 8)
+            encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+
+          cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
+          assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+        }
+      }
+    }
+
+    vpx_usec_timer_mark(&emr_timer);
+    cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
+  }
+
+  if (cpi->sf.skip_encode_sb) {
+    int j;
+    unsigned int intra_count = 0, inter_count = 0;
+    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
+      intra_count += cm->counts.intra_inter[j][0];
+      inter_count += cm->counts.intra_inter[j][1];
+    }
+    cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
+    cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
+    cpi->sf.skip_encode_frame &= cm->show_frame;
+  } else {
+    cpi->sf.skip_encode_frame = 0;
+  }
+
+#if 0
+  // Keep record of the total distortion this time around for future use
+  cpi->last_frame_distortion = cpi->frame_distortion;
+#endif
+}
+// end RTC play code
+
 
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2460,7 +2734,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   if (cpi->sf.RD) {
     int i;
     REFERENCE_MODE reference_mode;
-    INTERPOLATION_TYPE filter_type;
     /*
      * This code does a single RD pass over the whole frame assuming
      * either compound, single or hybrid prediction as per whatever has
@@ -2470,7 +2743,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
      * that for subsequent frames.
      * It does the same analysis for transform size selection also.
      */
-    const int frame_type = get_frame_type(cpi);
+    const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
     const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
     const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
 
@@ -2488,22 +2761,18 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     else
       reference_mode = REFERENCE_MODE_SELECT;
 
-    /* filter type selection */
-    // FIXME(rbultje) for some odd reason, we often select smooth_filter
-    // as default filter for ARF overlay frames. This is a REALLY BAD
-    // IDEA so we explicitly disable it here.
-    if (frame_type != 3 &&
-        filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
-        filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
-        filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
-      filter_type = EIGHTTAP_SMOOTH;
-    } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
-               filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
-      filter_type = EIGHTTAP_SHARP;
-    } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
-      filter_type = EIGHTTAP;
-    } else {
-      filter_type = SWITCHABLE;
+    if (cm->interp_filter == SWITCHABLE) {
+      if (frame_type != ALTREF_FRAME &&
+          filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
+          filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
+          filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
+        cm->interp_filter = EIGHTTAP_SMOOTH;
+      } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
+          filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
+        cm->interp_filter = EIGHTTAP_SHARP;
+      } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
+        cm->interp_filter = EIGHTTAP;
+      }
     }
 
     cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
@@ -2511,8 +2780,11 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
     select_tx_mode(cpi);
     cm->reference_mode = reference_mode;
-    cm->mcomp_filter_type = filter_type;
-    encode_frame_internal(cpi);
+
+    if (cpi->sf.super_fast_rtc)
+      encode_rtc_frame_internal(cpi);
+    else
+      encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
       const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2590,7 +2862,12 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       }
     }
   } else {
-    encode_frame_internal(cpi);
+    // Force the usage of the BILINEAR interp_filter.
+    cm->interp_filter = BILINEAR;
+    if (cpi->sf.super_fast_rtc)
+      encode_rtc_frame_internal(cpi);
+    else
+      encode_frame_internal(cpi);
   }
 }
 
@@ -2666,7 +2943,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
-                   (cpi->oxcf.aq_mode != COMPLEXITY_AQ);
+                   (cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
+                   !cpi->sf.super_fast_rtc;
   x->skip_optimize = ctx->is_coded;
   ctx->is_coded = 1;
   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
@@ -2681,7 +2959,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
       vp9_update_zbin_extra(cpi, x);
     }
   } else {
-    vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
       // Adjust the zbin based on this MB rate.
@@ -2721,7 +3000,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   } else {
     mbmi->skip_coeff = 1;
     if (output_enabled)
-      cm->counts.mbskip[vp9_get_skip_context(xd)][1]++;
+      cm->counts.skip[vp9_get_skip_context(xd)][1]++;
     reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
   }
 
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index 3e9f5381c..f7d17c301 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -12,6 +12,10 @@
 #ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_
 #define VP9_ENCODER_VP9_ENCODEFRAME_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct macroblock;
 struct yv12_buffer_config;
 
@@ -19,4 +23,8 @@ void vp9_setup_src_planes(struct macroblock *x,
                           const struct yv12_buffer_config *src,
                           int mi_row, int mi_col);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_ENCODEFRAME_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 4bef67501..8ff23c79a 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -25,26 +25,6 @@
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_tokenize.h"
 
-void vp9_setup_interp_filters(MACROBLOCKD *xd,
-                              INTERPOLATION_TYPE mcomp_filter_type,
-                              VP9_COMMON *cm) {
-  if (xd->mi_8x8 && xd->mi_8x8[0]) {
-    MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-
-    set_scale_factors(cm, xd, mbmi->ref_frame[0] - LAST_FRAME,
-                              mbmi->ref_frame[1] - LAST_FRAME);
-
-  } else {
-    set_scale_factors(cm, xd, -1, -1);
-  }
-
-  xd->subpix.filter_x = xd->subpix.filter_y =
-      vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ?
-                               EIGHTTAP : mcomp_filter_type);
-
-  assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
-}
-
 void vp9_subtract_block_c(int rows, int cols,
                           int16_t *diff_ptr, ptrdiff_t diff_stride,
                           const uint8_t *src_ptr, ptrdiff_t src_stride,
@@ -358,7 +338,6 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize,
                            pd->above_context, pd->left_context,
                            num_4x4_w, num_4x4_h);
 }
-
 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
                      TX_SIZE tx_size, void *arg) {
   struct encode_b_args* const args = arg;
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 207d573a5..9f6c9f069 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -16,6 +16,10 @@
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct optimize_ctx {
   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
@@ -44,7 +48,9 @@ void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize);
 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize);
 
 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
-void vp9_setup_interp_filters(MACROBLOCKD *xd,
-                              INTERPOLATION_TYPE mcomp_filter_type,
-                              VP9_COMMON *cm);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_ENCODEMB_H_
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index 761278fd1..c57b01db4 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -14,6 +14,10 @@
 
 #include "vp9/encoder/vp9_onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_entropy_mv_init();
 
 void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer* const);
@@ -30,4 +34,8 @@ void vp9_build_nmv_cost_table(int *mvjoint,
 
 void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_ENCODEMV_H_
diff --git a/vp9/encoder/vp9_extend.h b/vp9/encoder/vp9_extend.h
index 9b95ee437..058fe09cf 100644
--- a/vp9/encoder/vp9_extend.h
+++ b/vp9/encoder/vp9_extend.h
@@ -14,6 +14,10 @@
 #include "vpx_scale/yv12config.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
                                YV12_BUFFER_CONFIG *dst);
@@ -22,4 +26,8 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
                                          YV12_BUFFER_CONFIG *dst,
                                          int srcy, int srcx,
                                          int srch, int srcw);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_EXTEND_H_
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 56872682a..a03cbdd86 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -49,14 +49,15 @@
 
 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
 
+#define MIN_BOOST        300
+#define KEY_FRAME_BOOST 2000
+
 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
   YV12_BUFFER_CONFIG temp = *a;
   *a = *b;
   *b = temp;
 }
 
-static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame);
-
 static int select_cq_level(int qindex) {
   int ret_val = QINDEX_RANGE - 1;
   int i;
@@ -369,14 +370,11 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
   }
 }
 
-static unsigned int zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
-                                     YV12_BUFFER_CONFIG *recon_buffer,
-                                     int recon_yoffset) {
-  MACROBLOCKD *const xd = &x->e_mbd;
+static unsigned int zz_motion_search(const VP9_COMP *cpi, const MACROBLOCK *x) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const src = x->plane[0].src.buf;
   const int src_stride = x->plane[0].src.stride;
-  const uint8_t *const ref = xd->plane[0].pre[0].buf
-                           = recon_buffer->y_buffer + recon_yoffset;
+  const uint8_t *const ref = xd->plane[0].pre[0].buf;
   const int ref_stride = xd->plane[0].pre[0].stride;
 
   unsigned int sse;
@@ -387,8 +385,7 @@ static unsigned int zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
 
 static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                      const MV *ref_mv, MV *best_mv,
-                                     YV12_BUFFER_CONFIG *recon_buffer,
-                                     int *best_motion_err, int recon_yoffset) {
+                                     int *best_motion_err) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MV tmp_mv = {0, 0};
   MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3};
@@ -411,9 +408,6 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   // override the default variance function to use MSE
   v_fn_ptr.vf = get_block_variance_fn(bsize);
 
-  // Set up pointers for this macro block recon buffer
-  xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset;
-
   // Initial step/diamond search centred on best mv
   tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
                                     step_param,
@@ -456,6 +450,16 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   }
 }
 
+static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) {
+  if (2 * mb_col + 1 < cm->mi_cols) {
+    return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16
+                                        : BLOCK_16X8;
+  } else {
+    return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16
+                                        : BLOCK_8X8;
+  }
+}
+
 void vp9_first_pass(VP9_COMP *cpi) {
   int mb_row, mb_col;
   MACROBLOCK *const x = &cpi->mb;
@@ -480,7 +484,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
   int sum_mvr = 0, sum_mvc = 0;
   int sum_mvr_abs = 0, sum_mvc_abs = 0;
-  int sum_mvrs = 0, sum_mvcs = 0;
+  int64_t sum_mvrs = 0, sum_mvcs = 0;
   int mvcount = 0;
   int intercount = 0;
   int second_ref_count = 0;
@@ -490,10 +494,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
   int sum_in_vectors = 0;
   uint32_t lastmv_as_int = 0;
   struct twopass_rc *const twopass = &cpi->twopass;
-
-  int_mv zero_ref_mv;
-
-  zero_ref_mv.as_int = 0;
+  const MV zero_mv = {0, 0};
 
   vp9_clear_system_state();  // __asm emms;
 
@@ -502,8 +503,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
   setup_dst_planes(xd, new_yv12, 0, 0);
 
   xd->mi_8x8 = cm->mi_grid_visible;
-  // required for vp9_frame_init_quantizer
-  xd->mi_8x8[0] = cm->mi;
+  xd->mi_8x8[0] = cm->mi;  // required for vp9_frame_init_quantizer
 
   setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
 
@@ -517,14 +517,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
   }
   x->skip_recode = 0;
 
-
-  // Initialise the MV cost table to the defaults
-  // if( cm->current_video_frame == 0)
-  // if ( 0 )
-  {
-    vp9_init_mv_probs(cm);
-    vp9_initialize_rd_consts(cpi);
-  }
+  vp9_init_mv_probs(cm);
+  vp9_initialize_rd_consts(cpi);
 
   // tiling is ignored in the first pass
   vp9_tile_init(&tile, cm, 0, 0);
@@ -549,9 +543,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
     // for each macroblock col in image
     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
       int this_error;
-      int gf_motion_error = INT_MAX;
-      int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
+      const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
       double error_weight = 1.0;
+      const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
 
       vp9_clear_system_state();  // __asm emms;
 
@@ -559,30 +553,15 @@ void vp9_first_pass(VP9_COMP *cpi) {
       xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
       xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
       xd->left_available = (mb_col != 0);
-
-      if (mb_col * 2 + 1 < cm->mi_cols) {
-        if (mb_row * 2 + 1 < cm->mi_rows) {
-          xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X16;
-        } else {
-          xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X8;
-        }
-      } else {
-        if (mb_row * 2 + 1 < cm->mi_rows) {
-          xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X16;
-        } else {
-          xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X8;
-        }
-      }
+      xd->mi_8x8[0]->mbmi.sb_type = bsize;
       xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
       set_mi_row_col(xd, &tile,
-                     mb_row << 1,
-                     num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type],
-                     mb_col << 1,
-                     num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type],
+                     mb_row << 1, num_8x8_blocks_high_lookup[bsize],
+                     mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
                      cm->mi_rows, cm->mi_cols);
 
       if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-        int energy = vp9_block_energy(cpi, x, xd->mi_8x8[0]->mbmi.sb_type);
+        const int energy = vp9_block_energy(cpi, x, bsize);
         error_weight = vp9_vaq_inv_q_ratio(energy);
       }
 
@@ -608,21 +587,22 @@ void vp9_first_pass(VP9_COMP *cpi) {
       // Set up limit values for motion vectors to prevent them extending
       // outside the UMV borders.
       x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
-      x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
-                      + BORDER_MV_PIXELS_B16;
+      x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
 
       // Other than for the first frame do a motion search
       if (cm->current_video_frame > 0) {
-        int tmp_err;
-        int motion_error = zz_motion_search(cpi, x, lst_yv12, recon_yoffset);
+        int tmp_err, motion_error;
         int_mv mv, tmp_mv;
+
+        xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
+        motion_error = zz_motion_search(cpi, x);
         // Simple 0,0 motion with no mv overhead
         mv.as_int = tmp_mv.as_int = 0;
 
         // Test last reference frame using the previous best mv as the
         // starting point (best reference) for the search
         first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
-                                 lst_yv12, &motion_error, recon_yoffset);
+                                 &motion_error);
         if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
           vp9_clear_system_state();  // __asm emms;
           motion_error *= error_weight;
@@ -632,8 +612,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
         // based search as well.
         if (best_ref_mv.as_int) {
           tmp_err = INT_MAX;
-          first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
-                                   lst_yv12, &tmp_err, recon_yoffset);
+          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+                                   &tmp_err);
           if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
             vp9_clear_system_state();  // __asm emms;
             tmp_err *= error_weight;
@@ -648,19 +628,20 @@ void vp9_first_pass(VP9_COMP *cpi) {
         // Experimental search in an older reference frame
         if (cm->current_video_frame > 1) {
           // Simple 0,0 motion with no mv overhead
-          gf_motion_error = zz_motion_search(cpi, x, gld_yv12, recon_yoffset);
+          int gf_motion_error;
+
+          xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
+          gf_motion_error = zz_motion_search(cpi, x);
 
-          first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
-                                   gld_yv12, &gf_motion_error, recon_yoffset);
+          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+                                   &gf_motion_error);
           if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
             vp9_clear_system_state();  // __asm emms;
             gf_motion_error *= error_weight;
           }
 
-          if ((gf_motion_error < motion_error) &&
-              (gf_motion_error < this_error)) {
+          if (gf_motion_error < motion_error && gf_motion_error < this_error)
             second_ref_count++;
-          }
 
           // Reset to last frame as reference buffer
           xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
@@ -697,9 +678,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
           xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
           xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
           xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE;
-          vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1,
-                                         xd->mi_8x8[0]->mbmi.sb_type);
-          vp9_encode_sby(x, xd->mi_8x8[0]->mbmi.sb_type);
+          vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
+          vp9_encode_sby(x, bsize);
           sum_mvr += mv.as_mv.row;
           sum_mvr_abs += abs(mv.as_mv.row);
           sum_mvc += mv.as_mv.col;
@@ -789,13 +769,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
       fps.mvr_abs = (double)sum_mvr_abs / mvcount;
       fps.MVc = (double)sum_mvc / mvcount;
       fps.mvc_abs = (double)sum_mvc_abs / mvcount;
-      fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) /
-                     mvcount;
-      fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) /
-                     mvcount;
+      fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) / mvcount;
+      fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / mvcount;
       fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
       fps.new_mv_count = new_mv_count;
-      fps.pcnt_motion = (double)mvcount / cpi->common.MBs;
+      fps.pcnt_motion = (double)mvcount / cm->MBs;
     } else {
       fps.MVr = 0.0;
       fps.mvr_abs = 0.0;
@@ -923,11 +901,10 @@ static double calc_correction_factor(double err_per_mb,
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-static int estimate_max_q(VP9_COMP *cpi,
-                          FIRSTPASS_STATS *fpstats,
+static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
                           int section_target_bandwitdh) {
   int q;
-  int num_mbs = cpi->common.MBs;
+  const int num_mbs = cpi->common.MBs;
   int target_norm_bits_per_mb;
   RATE_CONTROL *const rc = &cpi->rc;
 
@@ -953,9 +930,8 @@ static int estimate_max_q(VP9_COMP *cpi,
   }
 
   // Restriction on active max q for constrained quality mode.
-  if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
-      q < cpi->cq_target_quality)
-    q = cpi->cq_target_quality;
+  if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
+    q = MAX(q, cpi->cq_target_quality);
 
   return q;
 }
@@ -1018,6 +994,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
   FIRSTPASS_STATS this_frame;
   FIRSTPASS_STATS *start_pos;
   struct twopass_rc *const twopass = &cpi->twopass;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
 
   zero_stats(&twopass->total_stats);
   zero_stats(&twopass->total_left_stats);
@@ -1036,9 +1013,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
   vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count /
                         twopass->total_stats.duration);
 
-  cpi->output_framerate = cpi->oxcf.framerate;
+  cpi->output_framerate = oxcf->framerate;
   twopass->bits_left = (int64_t)(twopass->total_stats.duration *
-                                 cpi->oxcf.target_bandwidth / 10000000.0);
+                                 oxcf->target_bandwidth / 10000000.0);
 
   // Calculate a minimum intra value to be used in determining the IIratio
   // scores used in the second pass. We have this minimum to make sure
@@ -1054,15 +1031,12 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
   // ratio for the sequence.
   {
     double sum_iiratio = 0.0;
-    double IIRatio;
-
     start_pos = twopass->stats_in;  // Note the starting "file" position.
 
     while (input_stats(twopass, &this_frame) != EOF) {
-      IIRatio = this_frame.intra_error
-                / DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
-      IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio;
-      sum_iiratio += IIRatio;
+      const double iiratio = this_frame.intra_error /
+                                 DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
+      sum_iiratio += fclamp(iiratio, 1.0, 20.0);
     }
 
     twopass->avg_iiratio = sum_iiratio /
@@ -1082,9 +1056,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
 
     twopass->modified_error_total = 0.0;
     twopass->modified_error_min =
-      (av_error * cpi->oxcf.two_pass_vbrmin_section) / 100;
+      (av_error * oxcf->two_pass_vbrmin_section) / 100;
     twopass->modified_error_max =
-      (av_error * cpi->oxcf.two_pass_vbrmax_section) / 100;
+      (av_error * oxcf->two_pass_vbrmax_section) / 100;
 
     while (input_stats(twopass, &this_frame) != EOF) {
       twopass->modified_error_total +=
@@ -1101,12 +1075,12 @@ void vp9_end_second_pass(VP9_COMP *cpi) {
 
 // This function gives and estimate of how badly we believe
 // the prediction quality is decaying from frame to frame.
-static double get_prediction_decay_rate(VP9_COMP *cpi,
-                                        FIRSTPASS_STATS *next_frame) {
+static double get_prediction_decay_rate(const VP9_COMMON *cm,
+                                        const FIRSTPASS_STATS *next_frame) {
   // Look at the observed drop in prediction quality between the last frame
   // and the GF buffer (which contains an older frame).
   const double mb_sr_err_diff = (next_frame->sr_coded_error -
-                                     next_frame->coded_error) / cpi->common.MBs;
+                                     next_frame->coded_error) / cm->MBs;
   const double second_ref_decay = mb_sr_err_diff <= 512.0
       ? fclamp(pow(1.0 - (mb_sr_err_diff / 512.0), 0.5), 0.85, 1.0)
       : 0.85;
@@ -1134,7 +1108,6 @@ static int detect_transition_to_still(
     int j;
     FIRSTPASS_STATS *position = cpi->twopass.stats_in;
     FIRSTPASS_STATS tmp_next_frame;
-    double zz_inter;
 
     // Look ahead a few frames to see if static condition
     // persists...
@@ -1142,11 +1115,10 @@ static int detect_transition_to_still(
       if (EOF == input_stats(&cpi->twopass, &tmp_next_frame))
         break;
 
-      zz_inter = (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion);
-      if (zz_inter < 0.999)
+      if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999)
         break;
     }
-    // Reset file position
+
     reset_fpf_position(&cpi->twopass, position);
 
     // Only if it does do we signal a transition to still
@@ -1160,14 +1132,14 @@ static int detect_transition_to_still(
 // This function detects a flash through the high relative pcnt_second_ref
 // score in the frame following a flash frame. The offset passed in should
 // reflect this
-static int detect_flash(VP9_COMP *cpi, int offset) {
+static int detect_flash(const struct twopass_rc *twopass, int offset) {
   FIRSTPASS_STATS next_frame;
 
   int flash_detected = 0;
 
   // Read the frame data.
   // The return is FALSE (no flash detected) if not a valid frame
-  if (read_frame_stats(&cpi->twopass, &next_frame, offset) != EOF) {
+  if (read_frame_stats(twopass, &next_frame, offset) != EOF) {
     // What we are looking for here is a situation where there is a
     // brief break in prediction (such as a flash) but subsequent frames
     // are reasonably well predicted by an earlier (pre flash) frame.
@@ -1188,9 +1160,6 @@ static void accumulate_frame_motion_stats(
   double *mv_in_out_accumulator,
   double *abs_mv_in_out_accumulator,
   double *mv_ratio_accumulator) {
-  // double this_frame_mv_in_out;
-  double this_frame_mvr_ratio;
-  double this_frame_mvc_ratio;
   double motion_pct;
 
   // Accumulate motion stats.
@@ -1199,35 +1168,30 @@ static void accumulate_frame_motion_stats(
   // Accumulate Motion In/Out of frame stats
   *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
   *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
-  *abs_mv_in_out_accumulator +=
-    fabs(this_frame->mv_in_out_count * motion_pct);
+  *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct);
 
   // Accumulate a measure of how uniform (or conversely how random)
   // the motion field is. (A ratio of absmv / mv)
   if (motion_pct > 0.05) {
-    this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
+    const double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
                            DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr));
 
-    this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
+    const double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
                            DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc));
 
-    *mv_ratio_accumulator +=
-      (this_frame_mvr_ratio < this_frame->mvr_abs)
+    *mv_ratio_accumulator += (this_frame_mvr_ratio < this_frame->mvr_abs)
       ? (this_frame_mvr_ratio * motion_pct)
       : this_frame->mvr_abs * motion_pct;
 
-    *mv_ratio_accumulator +=
-      (this_frame_mvc_ratio < this_frame->mvc_abs)
+    *mv_ratio_accumulator += (this_frame_mvc_ratio < this_frame->mvc_abs)
       ? (this_frame_mvc_ratio * motion_pct)
       : this_frame->mvc_abs * motion_pct;
   }
 }
 
 // Calculate a baseline boost number for the current frame.
-static double calc_frame_boost(
-  VP9_COMP *cpi,
-  FIRSTPASS_STATS *this_frame,
-  double this_frame_mv_in_out) {
+static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame,
+                               double this_frame_mv_in_out) {
   double frame_boost;
 
   // Underlying boost factor is based on inter intra error ratio
@@ -1248,18 +1212,14 @@ static double calc_frame_boost(
   else
     frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
 
-  // Clip to maximum
-  if (frame_boost > GF_RMAX)
-    frame_boost = GF_RMAX;
-
-  return frame_boost;
+  return MIN(frame_boost, GF_RMAX);
 }
 
 static int calc_arf_boost(VP9_COMP *cpi, int offset,
                           int f_frames, int b_frames,
                           int *f_boost, int *b_boost) {
   FIRSTPASS_STATS this_frame;
-
+  struct twopass_rc *const twopass = &cpi->twopass;
   int i;
   double boost_score = 0.0;
   double mv_ratio_accumulator = 0.0;
@@ -1272,7 +1232,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
 
   // Search forward from the proposed arf/next gf position
   for (i = 0; i < f_frames; i++) {
-    if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF)
+    if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
       break;
 
     // Update the motion related elements to the boost calculation
@@ -1283,12 +1243,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
 
     // We want to discount the flash frame itself and the recovery
     // frame that follows as both will have poor scores.
-    flash_detected = detect_flash(cpi, (i + offset)) ||
-                     detect_flash(cpi, (i + offset + 1));
+    flash_detected = detect_flash(twopass, i + offset) ||
+                     detect_flash(twopass, i + offset + 1);
 
     // Cumulative effect of prediction quality decay
     if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame);
+      decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                           ? MIN_DECAY_FACTOR : decay_accumulator;
     }
@@ -1309,7 +1269,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
 
   // Search backward towards last gf position
   for (i = -1; i >= -b_frames; i--) {
-    if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF)
+    if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
       break;
 
     // Update the motion related elements to the boost calculation
@@ -1320,12 +1280,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
 
     // We want to discount the the flash frame itself and the recovery
     // frame that follows as both will have poor scores.
-    flash_detected = detect_flash(cpi, (i + offset)) ||
-                     detect_flash(cpi, (i + offset + 1));
+    flash_detected = detect_flash(twopass, i + offset) ||
+                     detect_flash(twopass, i + offset + 1);
 
     // Cumulative effect of prediction quality decay
     if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame);
+      decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR : decay_accumulator;
     }
@@ -1485,6 +1445,7 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   FIRSTPASS_STATS next_frame = { 0 };
   FIRSTPASS_STATS *start_pos;
+  struct twopass_rc *const twopass = &cpi->twopass;
   int i;
   double boost_score = 0.0;
   double old_boost_score = 0.0;
@@ -1505,8 +1466,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double mv_ratio_accumulator_thresh;
   int max_bits = frame_max_bits(cpi);     // Max for a single frame
 
-  unsigned int allow_alt_ref =
-    cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
+  unsigned int allow_alt_ref = cpi->oxcf.play_alternate &&
+                               cpi->oxcf.lag_in_frames;
 
   int f_boost = 0;
   int b_boost = 0;
@@ -1514,11 +1475,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int active_max_gf_interval;
   RATE_CONTROL *const rc = &cpi->rc;
 
-  cpi->twopass.gf_group_bits = 0;
+  twopass->gf_group_bits = 0;
 
   vp9_clear_system_state();  // __asm emms;
 
-  start_pos = cpi->twopass.stats_in;
+  start_pos = twopass->stats_in;
 
   // Load stats for the current frame.
   mod_frame_err = calculate_modified_err(cpi, this_frame);
@@ -1549,20 +1510,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     active_max_gf_interval = rc->max_gf_interval;
 
   i = 0;
-  while ((i < cpi->twopass.static_scene_max_gf_interval) &&
-         (i < rc->frames_to_key)) {
+  while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) {
     i++;    // Increment the loop counter
 
     // Accumulate error score of frames in this gf group
     mod_frame_err = calculate_modified_err(cpi, this_frame);
     gf_group_err += mod_frame_err;
 
-    if (EOF == input_stats(&cpi->twopass, &next_frame))
+    if (EOF == input_stats(twopass, &next_frame))
       break;
 
     // Test for the case where there is a brief flash but the prediction
     // quality back to an earlier frame is then restored.
-    flash_detected = detect_flash(cpi, 0);
+    flash_detected = detect_flash(twopass, 0);
 
     // Update the motion related elements to the boost calculation
     accumulate_frame_motion_stats(&next_frame,
@@ -1573,14 +1533,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // Cumulative effect of prediction quality decay
     if (!flash_detected) {
       last_loop_decay_rate = loop_decay_rate;
-      loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+      loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
       decay_accumulator = decay_accumulator * loop_decay_rate;
 
       // Monitor for static sections.
       if ((next_frame.pcnt_inter - next_frame.pcnt_motion) <
           zero_motion_accumulator) {
-        zero_motion_accumulator =
-          (next_frame.pcnt_inter - next_frame.pcnt_motion);
+        zero_motion_accumulator = next_frame.pcnt_inter -
+                                      next_frame.pcnt_motion;
       }
 
       // Break clause to detect very still sections after motion
@@ -1618,14 +1578,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     old_boost_score = boost_score;
   }
 
-  cpi->twopass.gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
+  twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
 
   // Don't allow a gf too near the next kf
   if ((rc->frames_to_key - i) < MIN_GF_INTERVAL) {
     while (i < (rc->frames_to_key + !rc->next_key_frame_forced)) {
       i++;
 
-      if (EOF == input_stats(&cpi->twopass, this_frame))
+      if (EOF == input_stats(twopass, this_frame))
         break;
 
       if (i < rc->frames_to_key) {
@@ -1927,186 +1887,6 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) {
   return 0;
 }
 
-void vp9_get_svc_params(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if ((cm->current_video_frame == 0) ||
-      (cm->frame_flags & FRAMEFLAGS_KEY) ||
-      (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
-                              cpi->key_frame_frequency == 0))) {
-    cm->frame_type = KEY_FRAME;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  cpi->rc.frames_till_gf_update_due = INT_MAX;
-  cpi->rc.baseline_gf_interval = INT_MAX;
-}
-
-void vp9_get_one_pass_params(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if (!cpi->refresh_alt_ref_frame &&
-      (cm->current_video_frame == 0 ||
-       cm->frame_flags & FRAMEFLAGS_KEY ||
-       cpi->rc.frames_to_key == 0 ||
-       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
-    cm->frame_type = KEY_FRAME;
-    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
-                                    cpi->rc.frames_to_key == 0;
-    cpi->rc.frames_to_key = cpi->key_frame_frequency;
-    cpi->rc.kf_boost = 300;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  if (cpi->rc.frames_till_gf_update_due == 0) {
-    cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
-    cpi->refresh_golden_frame = 1;
-  }
-}
-
-void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if ((cm->current_video_frame == 0 ||
-      cm->frame_flags & FRAMEFLAGS_KEY ||
-      cpi->rc.frames_to_key == 0 ||
-      (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
-    cm->frame_type = KEY_FRAME;
-    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
-                                    cpi->rc.frames_to_key == 0;
-    cpi->rc.frames_to_key = cpi->key_frame_frequency;
-    cpi->rc.kf_boost = 300;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  // Don't use gf_update by default in CBR mode.
-  cpi->rc.frames_till_gf_update_due = INT_MAX;
-  cpi->rc.baseline_gf_interval = INT_MAX;
-}
-
-void vp9_get_first_pass_params(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if (!cpi->refresh_alt_ref_frame &&
-      (cm->current_video_frame == 0 ||
-       cm->frame_flags & FRAMEFLAGS_KEY)) {
-    cm->frame_type = KEY_FRAME;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  // Do not use periodic key frames
-  cpi->rc.frames_to_key = INT_MAX;
-}
-
-void vp9_get_second_pass_params(VP9_COMP *cpi) {
-  int tmp_q;
-  int frames_left = (int)(cpi->twopass.total_stats.count -
-                          cpi->common.current_video_frame);
-
-  FIRSTPASS_STATS this_frame;
-  FIRSTPASS_STATS this_frame_copy;
-  RATE_CONTROL *rc = &cpi->rc;
-
-  double this_frame_intra_error;
-  double this_frame_coded_error;
-
-  if (cpi->refresh_alt_ref_frame) {
-    cpi->common.frame_type = INTER_FRAME;
-    return;
-  }
-  if (!cpi->twopass.stats_in)
-    return;
-
-  vp9_clear_system_state();
-
-  if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
-    rc->active_worst_quality = cpi->oxcf.cq_level;
-  } else if (cpi->common.current_video_frame == 0) {
-    // Special case code for first frame.
-    int section_target_bandwidth =
-        (int)(cpi->twopass.bits_left / frames_left);
-
-    tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
-                           section_target_bandwidth);
-
-    rc->active_worst_quality = tmp_q;
-    rc->ni_av_qi = tmp_q;
-    rc->avg_q = vp9_convert_qindex_to_q(tmp_q);
-
-    // Limit the maxq value returned subsequently.
-    // This increases the risk of overspend or underspend if the initial
-    // estimate for the clip is bad, but helps prevent excessive
-    // variation in Q, especially near the end of a clip
-    // where for example a small overspend may cause Q to crash
-    // adjust_maxq_qrange(cpi);
-  }
-  vp9_zero(this_frame);
-  if (EOF == input_stats(&cpi->twopass, &this_frame))
-    return;
-
-  this_frame_intra_error = this_frame.intra_error;
-  this_frame_coded_error = this_frame.coded_error;
-
-  // keyframe and section processing !
-  if (rc->frames_to_key == 0 ||
-      (cpi->common.frame_flags & FRAMEFLAGS_KEY)) {
-    // Define next KF group and assign bits to it
-    this_frame_copy = this_frame;
-    find_next_key_frame(cpi, &this_frame_copy);
-  } else {
-    cpi->common.frame_type = INTER_FRAME;
-  }
-
-  // Is this a GF / ARF (Note that a KF is always also a GF)
-  if (rc->frames_till_gf_update_due == 0) {
-    // Define next gf group and assign bits to it
-    this_frame_copy = this_frame;
-
-#if CONFIG_MULTIPLE_ARF
-    if (cpi->multi_arf_enabled) {
-      define_fixed_arf_period(cpi);
-    } else {
-#endif
-      define_gf_group(cpi, &this_frame_copy);
-#if CONFIG_MULTIPLE_ARF
-    }
-#endif
-
-    if (cpi->twopass.gf_zeromotion_pct > 995) {
-      // As long as max_thresh for encode breakout is small enough, it is ok
-      // to enable it for no-show frame, i.e. set enable_encode_breakout to 2.
-      if (!cpi->common.show_frame)
-        cpi->enable_encode_breakout = 0;
-      else
-        cpi->enable_encode_breakout = 2;
-    }
-
-    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-    cpi->refresh_golden_frame = 1;
-  } else {
-    // Otherwise this is an ordinary frame
-    // Assign bits from those allocated to the GF group
-    this_frame_copy =  this_frame;
-    assign_std_frame_bits(cpi, &this_frame_copy);
-  }
-
-  // Keep a globally available copy of this and the next frame's iiratio.
-  cpi->twopass.this_iiratio = (int)(this_frame_intra_error /
-                              DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
-  {
-    FIRSTPASS_STATS next_frame;
-    if (lookup_next_frame_stats(&cpi->twopass, &next_frame) != EOF) {
-      cpi->twopass.next_iiratio = (int)(next_frame.intra_error /
-                                  DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
-    }
-  }
-
-  // Set nominal per second bandwidth for this frame
-  cpi->target_bandwidth = (int)(rc->per_frame_bandwidth *
-                                   cpi->output_framerate);
-  if (cpi->target_bandwidth < 0)
-    cpi->target_bandwidth = 0;
-
-  // Update the total stats remaining structure
-  subtract_stats(&cpi->twopass.total_left_stats, &this_frame);
-}
-
 static int test_candidate_kf(VP9_COMP *cpi,
                              FIRSTPASS_STATS *last_frame,
                              FIRSTPASS_STATS *this_frame,
@@ -2137,7 +1917,6 @@ static int test_candidate_kf(VP9_COMP *cpi,
     double boost_score = 0.0;
     double old_boost_score = 0.0;
     double decay_accumulator = 1.0;
-    double next_iiratio;
 
     local_next_frame = *next_frame;
 
@@ -2146,8 +1925,8 @@ static int test_candidate_kf(VP9_COMP *cpi,
 
     // Examine how well the key frame predicts subsequent frames
     for (i = 0; i < 16; i++) {
-      next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error /
-                      DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
+      double next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error /
+                             DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
 
       if (next_iiratio > RMAX)
         next_iiratio = RMAX;
@@ -2269,7 +2048,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
 
       // How fast is prediction quality decaying
-      loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+      loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
 
       // We want to know something about the recent past... rather than
       // as used elsewhere where we are concened with decay in prediction
@@ -2403,9 +2182,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
         r = RMAX;
 
       // How fast is prediction quality decaying
-      if (!detect_flash(cpi, 0)) {
-        loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
-        decay_accumulator = decay_accumulator * loop_decay_rate;
+      if (!detect_flash(twopass, 0)) {
+        loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
+        decay_accumulator *= loop_decay_rate;
         decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR : decay_accumulator;
       }
@@ -2443,8 +2222,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     if (kf_boost < (rc->frames_to_key * 3))
       kf_boost = (rc->frames_to_key * 3);
 
-    if (kf_boost < 300)  // Min KF boost
-      kf_boost = 300;
+    if (kf_boost < MIN_BOOST)
+      kf_boost = MIN_BOOST;
 
     // Make a note of baseline boost and the zero motion
     // accumulator value for use elsewhere.
@@ -2526,6 +2305,199 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   twopass->modified_error_left -= kf_group_err;
 }
 
+void vp9_get_svc_params(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  if ((cm->current_video_frame == 0) ||
+      (cm->frame_flags & FRAMEFLAGS_KEY) ||
+      (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
+                              cpi->key_frame_frequency == 0))) {
+    cm->frame_type = KEY_FRAME;
+    cpi->rc.source_alt_ref_active = 0;
+  } else {
+    cm->frame_type = INTER_FRAME;
+  }
+  cpi->rc.frames_till_gf_update_due = INT_MAX;
+  cpi->rc.baseline_gf_interval = INT_MAX;
+}
+
+// Use this macro to turn on/off use of alt-refs in one-pass mode.
+#define USE_ALTREF_FOR_ONE_PASS   1
+
+void vp9_get_one_pass_params(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  if (!cpi->refresh_alt_ref_frame &&
+      (cm->current_video_frame == 0 ||
+       cm->frame_flags & FRAMEFLAGS_KEY ||
+       cpi->rc.frames_to_key == 0 ||
+       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
+    cm->frame_type = KEY_FRAME;
+    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
+                                    cpi->rc.frames_to_key == 0;
+    cpi->rc.frames_to_key = cpi->key_frame_frequency;
+    cpi->rc.kf_boost = KEY_FRAME_BOOST;
+    cpi->rc.source_alt_ref_active = 0;
+  } else {
+    cm->frame_type = INTER_FRAME;
+  }
+  if (cpi->rc.frames_till_gf_update_due == 0) {
+    cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
+    cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
+    // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+    if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key)
+      cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
+    cpi->refresh_golden_frame = 1;
+    cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+    cpi->rc.gfu_boost = 1000;
+  }
+}
+
+void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  if ((cm->current_video_frame == 0 ||
+      cm->frame_flags & FRAMEFLAGS_KEY ||
+      cpi->rc.frames_to_key == 0 ||
+      (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
+    cm->frame_type = KEY_FRAME;
+    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
+                                    cpi->rc.frames_to_key == 0;
+    cpi->rc.frames_to_key = cpi->key_frame_frequency;
+    cpi->rc.kf_boost = KEY_FRAME_BOOST;
+    cpi->rc.source_alt_ref_active = 0;
+  } else {
+    cm->frame_type = INTER_FRAME;
+  }
+  // Don't use gf_update by default in CBR mode.
+  cpi->rc.frames_till_gf_update_due = INT_MAX;
+  cpi->rc.baseline_gf_interval = INT_MAX;
+}
+
+void vp9_get_first_pass_params(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  if (!cpi->refresh_alt_ref_frame &&
+      (cm->current_video_frame == 0 ||
+       cm->frame_flags & FRAMEFLAGS_KEY)) {
+    cm->frame_type = KEY_FRAME;
+  } else {
+    cm->frame_type = INTER_FRAME;
+  }
+  // Do not use periodic key frames
+  cpi->rc.frames_to_key = INT_MAX;
+}
+
+void vp9_get_second_pass_params(VP9_COMP *cpi) {
+  int tmp_q;
+  int frames_left = (int)(cpi->twopass.total_stats.count -
+                          cpi->common.current_video_frame);
+
+  FIRSTPASS_STATS this_frame;
+  FIRSTPASS_STATS this_frame_copy;
+  RATE_CONTROL *rc = &cpi->rc;
+
+  double this_frame_intra_error;
+  double this_frame_coded_error;
+
+  if (!cpi->twopass.stats_in)
+    return;
+  if (cpi->refresh_alt_ref_frame) {
+    cpi->common.frame_type = INTER_FRAME;
+    rc->per_frame_bandwidth = cpi->twopass.gf_bits;
+    return;
+  }
+
+  vp9_clear_system_state();
+
+  if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
+    rc->active_worst_quality = cpi->oxcf.cq_level;
+  } else if (cpi->common.current_video_frame == 0) {
+    // Special case code for first frame.
+    int section_target_bandwidth =
+        (int)(cpi->twopass.bits_left / frames_left);
+
+    tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
+                           section_target_bandwidth);
+
+    rc->active_worst_quality = tmp_q;
+    rc->ni_av_qi = tmp_q;
+    rc->avg_q = vp9_convert_qindex_to_q(tmp_q);
+
+    // Limit the maxq value returned subsequently.
+    // This increases the risk of overspend or underspend if the initial
+    // estimate for the clip is bad, but helps prevent excessive
+    // variation in Q, especially near the end of a clip
+    // where for example a small overspend may cause Q to crash
+    // adjust_maxq_qrange(cpi);
+  }
+  vp9_zero(this_frame);
+  if (EOF == input_stats(&cpi->twopass, &this_frame))
+    return;
+
+  this_frame_intra_error = this_frame.intra_error;
+  this_frame_coded_error = this_frame.coded_error;
+
+  // keyframe and section processing !
+  if (rc->frames_to_key == 0 ||
+      (cpi->common.frame_flags & FRAMEFLAGS_KEY)) {
+    // Define next KF group and assign bits to it
+    this_frame_copy = this_frame;
+    find_next_key_frame(cpi, &this_frame_copy);
+  } else {
+    cpi->common.frame_type = INTER_FRAME;
+  }
+
+  // Is this a GF / ARF (Note that a KF is always also a GF)
+  if (rc->frames_till_gf_update_due == 0) {
+    // Define next gf group and assign bits to it
+    this_frame_copy = this_frame;
+
+#if CONFIG_MULTIPLE_ARF
+    if (cpi->multi_arf_enabled) {
+      define_fixed_arf_period(cpi);
+    } else {
+#endif
+      define_gf_group(cpi, &this_frame_copy);
+#if CONFIG_MULTIPLE_ARF
+    }
+#endif
+
+    if (cpi->twopass.gf_zeromotion_pct > 995) {
+      // As long as max_thresh for encode breakout is small enough, it is ok
+      // to enable it for no-show frame, i.e. set enable_encode_breakout to 2.
+      if (!cpi->common.show_frame)
+        cpi->enable_encode_breakout = 0;
+      else
+        cpi->enable_encode_breakout = 2;
+    }
+
+    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+    cpi->refresh_golden_frame = 1;
+  } else {
+    // Otherwise this is an ordinary frame
+    // Assign bits from those allocated to the GF group
+    this_frame_copy =  this_frame;
+    assign_std_frame_bits(cpi, &this_frame_copy);
+  }
+
+  // Keep a globally available copy of this and the next frame's iiratio.
+  cpi->twopass.this_iiratio = (int)(this_frame_intra_error /
+                              DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
+  {
+    FIRSTPASS_STATS next_frame;
+    if (lookup_next_frame_stats(&cpi->twopass, &next_frame) != EOF) {
+      cpi->twopass.next_iiratio = (int)(next_frame.intra_error /
+                                  DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
+    }
+  }
+
+  // Set nominal per second bandwidth for this frame
+  cpi->target_bandwidth = (int)(rc->per_frame_bandwidth *
+                                   cpi->output_framerate);
+  if (cpi->target_bandwidth < 0)
+    cpi->target_bandwidth = 0;
+
+  // Update the total stats remaining structure
+  subtract_stats(&cpi->twopass.total_left_stats, &this_frame);
+}
+
 void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
 #ifdef DISABLE_RC_LONG_TERM_MEM
   cpi->twopass.bits_left -=  cpi->rc.this_frame_target;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index f89e4cb1c..ca5b10080 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -12,6 +12,10 @@
 #define VP9_ENCODER_VP9_FIRSTPASS_H_
 #include "vp9/encoder/vp9_onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_init_first_pass(VP9_COMP *cpi);
 void vp9_first_pass(VP9_COMP *cpi);
 void vp9_end_first_pass(VP9_COMP *cpi);
@@ -25,4 +29,8 @@ void vp9_get_one_pass_params(VP9_COMP *cpi);
 void vp9_get_one_pass_cbr_params(VP9_COMP *cpi);
 void vp9_get_svc_params(VP9_COMP *cpi);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_FIRSTPASS_H_
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index ee73ff15a..e6e59c05a 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -173,7 +173,6 @@ struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx,
                                             int index) {
   struct lookahead_entry *buf = NULL;
 
-  assert(index < (int)ctx->max_sz);
   if (index < (int)ctx->sz) {
     index += ctx->read_idx;
     if (index >= (int)ctx->max_sz)
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
index c773f8fcc..1c00c462d 100644
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -14,6 +14,10 @@
 #include "vpx_scale/yv12config.h"
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MAX_LAG_BUFFERS 25
 
 struct lookahead_entry {
@@ -94,4 +98,8 @@ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
  */
 unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_LOOKAHEAD_H_
diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h
index c5bca4d01..79dd2bc95 100644
--- a/vp9/encoder/vp9_mbgraph.h
+++ b/vp9/encoder/vp9_mbgraph.h
@@ -11,6 +11,14 @@
 #ifndef VP9_ENCODER_VP9_MBGRAPH_H_
 #define VP9_ENCODER_VP9_MBGRAPH_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_update_mbgraph_stats(VP9_COMP *cpi);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_MBGRAPH_H_
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index b63fbd56b..4c4ac5dfa 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -184,12 +184,6 @@ static INLINE int sp(int x) {
   return (x & 7) << 1;
 }
 
-#define IFMVCV(r, c, s, e)                                \
-    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
-      s                                                   \
-    else                                                  \
-      e;
-
 static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) {
   return &buf[(r >> 3) * stride + (c >> 3) - offset];
 }
@@ -201,17 +195,18 @@ static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) {
 
 /* checks if (r, c) has better score than previous best */
 #define CHECK_BETTER(v, r, c) \
-    IFMVCV(r, c, {                                                       \
-      thismse = (DIST(r, c));                                            \
-      if ((v = MVC(r, c) + thismse) < besterr) {                         \
-        besterr = v;                                                     \
-        br = r;                                                          \
-        bc = c;                                                          \
-        *distortion = thismse;                                           \
-        *sse1 = sse;                                                     \
-      }                                                                  \
-    },                                                                   \
-    v = INT_MAX;)
+  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
+    thismse = (DIST(r, c));                                            \
+    if ((v = MVC(r, c) + thismse) < besterr) {                         \
+      besterr = v;                                                     \
+      br = r;                                                          \
+      bc = c;                                                          \
+      *distortion = thismse;                                           \
+      *sse1 = sse;                                                     \
+    }                                                                  \
+  } else {                                                             \
+    v = INT_MAX;                                                       \
+  }
 
 #define FIRST_LEVEL_CHECKS                              \
   {                                                     \
@@ -469,7 +464,6 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
 #undef MVC
 #undef PRE
 #undef DIST
-#undef IFMVCV
 #undef CHECK_BETTER
 #undef SP
 
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index eee28a7ba..c3a8be212 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -15,6 +15,10 @@
 #include "vp9/encoder/vp9_block.h"
 #include "vp9/encoder/vp9_variance.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // The maximum number of steps in a step search given the largest
 // allowed initial step
 #define MAX_MVSEARCH_STEPS 11
@@ -129,4 +133,8 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv, const uint8_t *second_pred,
                              int w, int h);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_MCOMP_H_
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 27531d232..a9b0718c8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -93,14 +93,6 @@ FILE *kf_list;
 FILE *keyfile;
 #endif
 
-
-#ifdef MODE_STATS
-extern void init_tx_count_stats();
-extern void write_tx_count_stats();
-extern void init_switchable_interp_stats();
-extern void write_switchable_interp_stats();
-#endif
-
 #ifdef SPEEDSTATS
 unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                     0, 0, 0};
@@ -463,14 +455,17 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) {
     cache_ptr += cm->mi_cols;
   }
 }
+static int is_slowest_mode(int mode) {
+  return (mode == MODE_SECONDPASS_BEST || mode == MODE_BESTQUALITY);
+}
 
-static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
+static void set_rd_speed_thresholds(VP9_COMP *cpi) {
   SPEED_FEATURES *sf = &cpi->sf;
   int i;
 
   // Set baseline threshold values
   for (i = 0; i < MAX_MODES; ++i)
-    sf->thresh_mult[i] = mode == 0 ? -500 : 0;
+    sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
 
   sf->thresh_mult[THR_NEARESTMV] = 0;
   sf->thresh_mult[THR_NEARESTG] = 0;
@@ -546,12 +541,12 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
   }
 }
 
-static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) {
+static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
   SPEED_FEATURES *sf = &cpi->sf;
   int i;
 
   for (i = 0; i < MAX_REFS; ++i)
-    sf->thresh_mult_sub8x8[i] = mode == 0 ? -500 : 0;
+    sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
 
   sf->thresh_mult_sub8x8[THR_LAST] += 2500;
   sf->thresh_mult_sub8x8[THR_GOLD] += 2500;
@@ -601,7 +596,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
 
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_filter_type = 1;
+    sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
     sf->recode_loop = 2;
@@ -627,14 +622,14 @@ static void set_good_speed_feature(VP9_COMMON *cm,
                                  FLAG_SKIP_INTRA_LOWVAR;
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_filter_type = 2;
+    sf->adaptive_pred_interp_filter = 2;
     sf->reference_masking = 1;
     sf->auto_mv_step_size = 1;
 
     sf->disable_filter_search_var_thresh = 50;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
 
-    sf->auto_min_max_partition_size = 1;
+    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
     sf->adjust_partitioning_from_last_frame = 1;
     sf->last_partitioning_redo_frequency = 3;
@@ -664,14 +659,14 @@ static void set_good_speed_feature(VP9_COMMON *cm,
 
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_filter_type = 2;
+    sf->adaptive_pred_interp_filter = 2;
     sf->reference_masking = 1;
     sf->auto_mv_step_size = 1;
 
     sf->disable_filter_search_var_thresh = 100;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
 
-    sf->auto_min_max_partition_size = 1;
+    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
     sf->adjust_partitioning_from_last_frame = 1;
     sf->last_partitioning_redo_frequency = 3;
@@ -699,14 +694,14 @@ static void set_good_speed_feature(VP9_COMMON *cm,
 
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_filter_type = 2;
+    sf->adaptive_pred_interp_filter = 2;
     sf->reference_masking = 1;
     sf->auto_mv_step_size = 1;
 
     sf->disable_filter_search_var_thresh = 200;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
 
-    sf->auto_min_max_partition_size = 1;
+    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
     sf->adjust_partitioning_from_last_frame = 1;
     sf->last_partitioning_redo_frequency = 3;
@@ -772,7 +767,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
 
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_filter_type = 1;
+    sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
     sf->recode_loop = 2;
@@ -798,14 +793,14 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
 
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_filter_type = 2;
+    sf->adaptive_pred_interp_filter = 2;
     sf->auto_mv_step_size = 1;
     sf->reference_masking = 1;
 
     sf->disable_filter_search_var_thresh = 50;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
 
-    sf->auto_min_max_partition_size = 1;
+    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
     sf->adjust_partitioning_from_last_frame = 1;
     sf->last_partitioning_redo_frequency = 3;
@@ -847,17 +842,24 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
   if (speed >= 5) {
     int i;
     sf->disable_split_mask = DISABLE_ALL_SPLIT;
+    sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
+        RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+    sf->subpel_force_stop = 1;
     for (i = 0; i < TX_SIZES; i++) {
       sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
       sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
     }
+    sf->use_fast_lpf_pick = 2;
+    sf->RD = 0;
+  }
+  if (speed >= 6) {
+    sf->super_fast_rtc = 1;
   }
 }
 
 void vp9_set_speed_features(VP9_COMP *cpi) {
   SPEED_FEATURES *sf = &cpi->sf;
   VP9_COMMON *cm = &cpi->common;
-  int mode = cpi->compressor_speed;
   int speed = cpi->speed;
   int i;
 
@@ -874,6 +876,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->recode_loop = 1;
   sf->subpel_search_method = SUBPEL_TREE;
   sf->subpel_iters_per_step = 2;
+  sf->subpel_force_stop = 0;
   sf->optimize_coefficients = !cpi->oxcf.lossless;
   sf->reduce_first_step_size = 0;
   sf->auto_mv_step_size = 0;
@@ -884,12 +887,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->tx_size_search_method = USE_FULL_RD;
   sf->use_lp32x32fdct = 0;
   sf->adaptive_motion_search = 0;
-  sf->adaptive_pred_filter_type = 0;
+  sf->adaptive_pred_interp_filter = 0;
   sf->reference_masking = 0;
   sf->use_one_partition_size_always = 0;
   sf->less_rectangular_check = 0;
   sf->use_square_partition_only = 0;
-  sf->auto_min_max_partition_size = 0;
+  sf->auto_min_max_partition_size = NOT_IN_USE;
   sf->max_partition_size = BLOCK_64X64;
   sf->min_partition_size = BLOCK_4X4;
   sf->adjust_partitioning_from_last_frame = 0;
@@ -909,23 +912,26 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->use_fast_coef_updates = 0;
   sf->using_small_partition_info = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
+  sf->super_fast_rtc = 0;
 
-  switch (mode) {
-    case 0:  // This is the best quality mode.
+  switch (cpi->oxcf.mode) {
+    case MODE_BESTQUALITY:
+    case MODE_SECONDPASS_BEST:  // This is the best quality mode.
       cpi->diamond_search_sad = vp9_full_range_search;
       break;
-    case 1:
+    case MODE_FIRSTPASS:
+    case MODE_GOODQUALITY:
+    case MODE_SECONDPASS:
       set_good_speed_feature(cm, sf, speed);
       break;
-      break;
-    case 2:
+    case MODE_REALTIME:
       set_rt_speed_feature(cm, sf, speed);
       break;
   }; /* switch */
 
   // Set rd thresholds based on mode and speed setting
-  set_rd_speed_thresholds(cpi, mode);
-  set_rd_speed_thresholds_sub8x8(cpi, mode);
+  set_rd_speed_thresholds(cpi);
+  set_rd_speed_thresholds_sub8x8(cpi);
 
   // Slow quant, dct and trellis not worthwhile for first pass
   // so make sure they are always turned off.
@@ -969,7 +975,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
   if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
                                cpi->oxcf.width, cpi->oxcf.height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+                               VP9_ENC_BORDER_IN_PIXELS))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate altref buffer");
 }
@@ -1037,14 +1043,14 @@ static void update_frame_size(VP9_COMP *cpi) {
   if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+                               VP9_ENC_BORDER_IN_PIXELS))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate last frame buffer");
 
   if (vp9_realloc_frame_buffer(&cpi->scaled_source,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+                               VP9_ENC_BORDER_IN_PIXELS))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate scaled source buffer");
 
@@ -1246,24 +1252,24 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
       // Real time and one pass deprecated in test code base
     case MODE_GOODQUALITY:
       cpi->pass = 0;
-      cpi->compressor_speed = 2;
       cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5);
       break;
 
     case MODE_FIRSTPASS:
       cpi->pass = 1;
-      cpi->compressor_speed = 1;
       break;
 
     case MODE_SECONDPASS:
       cpi->pass = 2;
-      cpi->compressor_speed = 1;
       cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5);
       break;
 
     case MODE_SECONDPASS_BEST:
       cpi->pass = 2;
-      cpi->compressor_speed = 0;
+      break;
+
+    case MODE_REALTIME:
+      cpi->pass = 0;
       break;
   }
 
@@ -1339,7 +1345,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
 
   cpi->cq_target_quality = cpi->oxcf.cq_level;
 
-  cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
+  cm->interp_filter = DEFAULT_INTERP_FILTER;
 
   cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
 
@@ -1631,11 +1637,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
     init_context_counters();
 #endif
 
-#ifdef MODE_STATS
-  init_tx_count_stats();
-  init_switchable_interp_stats();
-#endif
-
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
   cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -1892,13 +1893,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
       vp9_end_second_pass(cpi);
     }
 
-#ifdef MODE_STATS
-    if (cpi->pass != 1) {
-      write_tx_count_stats();
-      write_switchable_interp_stats();
-    }
-#endif
-
 #if CONFIG_INTERNAL_STATS
 
     vp9_clear_system_state();
@@ -2203,7 +2197,7 @@ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
   if (index < 0 || index >= REF_FRAMES)
     return -1;
 
-  *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
+  *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
   return 0;
 }
 
@@ -2495,9 +2489,9 @@ static void update_reference_frames(VP9_COMP * const cpi) {
   // At this point the new frame has been encoded.
   // If any buffer copy / swapping is signaled it should be done here.
   if (cm->frame_type == KEY_FRAME) {
-    ref_cnt_fb(cm->fb_idx_ref_cnt,
+    ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
-    ref_cnt_fb(cm->fb_idx_ref_cnt,
+    ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
   }
 #if CONFIG_MULTIPLE_ARF
@@ -2518,7 +2512,7 @@ static void update_reference_frames(VP9_COMP * const cpi) {
      */
     int tmp;
 
-    ref_cnt_fb(cm->fb_idx_ref_cnt,
+    ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
 
     tmp = cpi->alt_fb_idx;
@@ -2532,18 +2526,18 @@ static void update_reference_frames(VP9_COMP * const cpi) {
         arf_idx = cpi->arf_buffer_idx[cpi->sequence_number + 1];
       }
 #endif
-      ref_cnt_fb(cm->fb_idx_ref_cnt,
+      ref_cnt_fb(cm->frame_bufs,
                  &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
     }
 
     if (cpi->refresh_golden_frame) {
-      ref_cnt_fb(cm->fb_idx_ref_cnt,
+      ref_cnt_fb(cm->frame_bufs,
                  &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
     }
   }
 
   if (cpi->refresh_last_frame) {
-    ref_cnt_fb(cm->fb_idx_ref_cnt,
+    ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
   }
 }
@@ -2581,20 +2575,20 @@ static void scale_references(VP9_COMP *cpi) {
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
-    YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[idx];
+    YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
 
     if (ref->y_crop_width != cm->width ||
         ref->y_crop_height != cm->height) {
       const int new_fb = get_free_fb(cm);
-      vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb],
+      vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
-      scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
+                               VP9_ENC_BORDER_IN_PIXELS);
+      scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
       cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
     } else {
       cpi->scaled_ref_idx[ref_frame - 1] = idx;
-      cm->fb_idx_ref_cnt[idx]++;
+      cm->frame_bufs[idx].ref_count++;
     }
   }
 }
@@ -2604,7 +2598,7 @@ static void release_scaled_references(VP9_COMP *cpi) {
   int i;
 
   for (i = 0; i < 3; i++)
-    cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--;
+    cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--;
 }
 
 static void full_to_model_count(unsigned int *model_count,
@@ -2749,7 +2743,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     if (cpi->sf.recode_loop != 0) {
       vp9_save_coding_context(cpi);
       cpi->dummy_packing = 1;
-      vp9_pack_bitstream(cpi, dest, size);
+      if (!cpi->sf.super_fast_rtc)
+        vp9_pack_bitstream(cpi, dest, size);
+
       cpi->rc.projected_frame_size = (*size) << 3;
       vp9_restore_coding_context(cpi);
 
@@ -2972,15 +2968,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // Clear down mmx registers to allow floating point in what follows.
   vp9_clear_system_state();
 
-  // For an alt ref frame in 2 pass we skip the call to the second
-  // pass function that sets the target bandwidth so we must set it here.
-  if (cpi->refresh_alt_ref_frame) {
-    // Set a per frame bit target for the alt ref frame.
-    cpi->rc.per_frame_bandwidth = cpi->twopass.gf_bits;
-    // Set a per second target bitrate.
-    cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * cpi->output_framerate);
-  }
-
   // Clear zbin over-quant value and mode boost values.
   cpi->zbin_mode_boost = 0;
 
@@ -3107,13 +3094,24 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
                                    &frame_under_shoot_limit,
                                    &frame_over_shoot_limit);
 
-  // Decide q and q bounds
+  // Decide q and q bounds.
   q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
                                         &bottom_index,
                                         &top_index);
 
+  // JBB : This is realtime mode.  In real time mode the first frame
+  // should be larger. Q of 0 is disabled because we force tx size to be
+  // 16x16...
+  if (cpi->sf.super_fast_rtc) {
+    if (cpi->common.current_video_frame == 0)
+      q /= 3;
+
+    if (q == 0)
+      q++;
+  }
+
   if (!frame_is_intra_only(cm)) {
-    cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
+    cm->interp_filter = DEFAULT_INTERP_FILTER;
     /* TODO: Decide this more intelligently */
     set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
   }
@@ -3253,7 +3251,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cm->last_height = cm->height;
 
   // reset to normal state now that we are done.
-  cm->last_show_frame = cm->show_frame;
+  if (!cm->show_existing_frame)
+    cm->last_show_frame = cm->show_frame;
   if (cm->show_frame) {
     // current mip will be the prev_mip for the next frame
     MODE_INFO *temp = cm->prev_mip;
@@ -3312,7 +3311,6 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
 
   vp9_get_second_pass_params(cpi);
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
-  // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt");
 
   vp9_twopass_postencode_update(cpi, *size);
 }
@@ -3334,6 +3332,7 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                           int64_t end_time) {
   VP9_COMP              *cpi = (VP9_COMP *) ptr;
+  VP9_COMMON             *cm = &cpi->common;
   struct vpx_usec_timer  timer;
   int                    res = 0;
   const int    subsampling_x = sd->uv_width  < sd->y_width;
@@ -3347,6 +3346,12 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
+  if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) {
+    vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
+                       "Non-4:2:0 color space requires profile >= 1");
+    res = -1;
+  }
+
   return res;
 }
 
@@ -3414,6 +3419,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
                             int64_t *time_stamp, int64_t *time_end, int flush) {
   VP9_COMP *cpi = (VP9_COMP *) ptr;
   VP9_COMMON *cm = &cpi->common;
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
   struct vpx_usec_timer  cmptimer;
   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
   MV_REFERENCE_FRAME ref_frame;
@@ -3461,8 +3467,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
       if (cpi->oxcf.arnr_max_frames > 0) {
         // Produce the filtered ARF frame.
         // TODO(agrange) merge these two functions.
-        configure_arnr_filter(cpi, cm->current_video_frame + frames_to_arf,
-                              cpi->rc.gfu_boost);
+        vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost);
         vp9_temporal_filter_prepare(cpi, frames_to_arf);
         vp9_extend_frame_borders(&cpi->alt_ref_buffer,
                                  cm->subsampling_x, cm->subsampling_y);
@@ -3478,7 +3483,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
 #if CONFIG_MULTIPLE_ARF
       if (!cpi->multi_arf_enabled)
 #endif
-        cpi->rc.source_alt_ref_pending = 0;   // Clear Pending altf Ref flag.
+        cpi->rc.source_alt_ref_pending = 0;
+    } else {
+      cpi->rc.source_alt_ref_pending = 0;
     }
   }
 
@@ -3560,7 +3567,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   /* find a free buffer for the new frame, releasing the reference previously
    * held.
    */
-  cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+  cm->frame_bufs[cm->new_fb_idx].ref_count--;
   cm->new_fb_idx = get_free_fb(cm);
 
 #if CONFIG_MULTIPLE_ARF
@@ -3580,13 +3587,11 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
                            cm->width, cm->height,
                            cm->subsampling_x, cm->subsampling_y,
-                           VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
-
+                           VP9_ENC_BORDER_IN_PIXELS);
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
-    YV12_BUFFER_CONFIG *const buf = &cm->yv12_fb[idx];
-
+    YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
     ref_buf->buf = buf;
     ref_buf->idx = idx;
@@ -3598,11 +3603,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
       vp9_extend_frame_borders(buf, cm->subsampling_x, cm->subsampling_y);
   }
 
-  vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
+  set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
+  xd->interp_kernel = vp9_get_interp_kernel(
+      DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER);
 
-  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-      vp9_vaq_init();
-  }
+  if (cpi->oxcf.aq_mode == VARIANCE_AQ)
+    vp9_vaq_init();
 
   if (cpi->use_svc) {
     SvcEncode(cpi, size, dest, frame_flags);
@@ -3872,24 +3878,25 @@ void vp9_set_svc(VP9_PTR comp, int use_svc) {
   return;
 }
 
-int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) {
+int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
+                    const YV12_BUFFER_CONFIG *reference) {
   int i, j;
   int total = 0;
 
-  uint8_t *src = source->y_buffer;
-  uint8_t *dst = dest->y_buffer;
+  const uint8_t *src = source->y_buffer;
+  const uint8_t *ref = reference->y_buffer;
 
   // Loop through the Y plane raw and reconstruction data summing
   // (square differences)
   for (i = 0; i < source->y_height; i += 16) {
     for (j = 0; j < source->y_width; j += 16) {
       unsigned int sse;
-      total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
-                            &sse);
+      total += vp9_mse16x16(src + j, source->y_stride,
+                            ref + j, reference->y_stride, &sse);
     }
 
     src += 16 * source->y_stride;
-    dst += 16 * dest->y_stride;
+    ref += 16 * reference->y_stride;
   }
 
   return total;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index a665bf859..d928312b6 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -8,25 +8,32 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
 #ifndef VP9_ENCODER_VP9_ONYX_INT_H_
 #define VP9_ENCODER_VP9_ONYX_INT_H_
 
 #include <stdio.h>
+
 #include "./vpx_config.h"
+#include "vpx_ports/mem.h"
+#include "vpx/internal/vpx_codec_internal.h"
+
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_onyx.h"
-#include "vp9/encoder/vp9_treewriter.h"
-#include "vp9/encoder/vp9_tokenize.h"
 #include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/encoder/vp9_variance.h"
+
 #include "vp9/encoder/vp9_encodemb.h"
-#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_entropymode.h"
-#include "vpx_ports/mem.h"
-#include "vpx/internal/vpx_codec_internal.h"
-#include "vp9/encoder/vp9_mcomp.h"
 #include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_variance.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 #define DISABLE_RC_LONG_TERM_MEM 0
 // #define MODE_TEST_HIT_STATS
@@ -68,7 +75,6 @@ typedef struct {
   // 0 = ZERO_MV, MV
   signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
 
-  int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
   FRAME_CONTEXT fc;
 } CODING_CONTEXT;
 
@@ -95,18 +101,6 @@ typedef struct {
 } FIRSTPASS_STATS;
 
 typedef struct {
-  int frames_so_far;
-  double frame_intra_error;
-  double frame_coded_error;
-  double frame_pcnt_inter;
-  double frame_pcnt_motion;
-  double frame_mvr;
-  double frame_mvr_abs;
-  double frame_mvc;
-  double frame_mvc_abs;
-} ONEPASS_FRAMESTATS;
-
-typedef struct {
   struct {
     int err;
     union {
@@ -187,6 +181,12 @@ typedef enum {
 } TX_SIZE_SEARCH_METHOD;
 
 typedef enum {
+  NOT_IN_USE = 0,
+  RELAXED_NEIGHBORING_MIN_MAX = 1,
+  STRICT_NEIGHBORING_MIN_MAX = 2
+} AUTO_MIN_MAX_MODE;
+
+typedef enum {
   // Values should be powers of 2 so that they can be selected as bits of
   // an integer flags field
 
@@ -253,6 +253,9 @@ typedef struct {
   // Maximum number of steps in logarithmic subpel search before giving up.
   int subpel_iters_per_step;
 
+  // Control when to stop subpel search
+  int subpel_force_stop;
+
   // Thresh_mult is used to set a threshold for the rd score. A higher value
   // means that we will accept the best mode so far more often. This number
   // is used in combination with the current block size, and thresh_freq_fact
@@ -340,9 +343,8 @@ typedef struct {
   BLOCK_SIZE always_this_block_size;
 
   // Sets min and max partition sizes for this 64x64 region based on the
-  // same superblock in last encoded frame, and the left and above neighbor
-  // in this block.
-  int auto_min_max_partition_size;
+  // same 64x64 in last encoded frame, and the left and above neighbor.
+  AUTO_MIN_MAX_MODE auto_min_max_partition_size;
 
   // Min and max partition size we enable (block_size) as per auto
   // min max, but also used by adjust partitioning, and pick_partitioning.
@@ -376,7 +378,7 @@ typedef struct {
   // best for 8x8 mode. If set to 0 we always re check all the filters for
   // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
   // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
-  int adaptive_pred_filter_type;
+  int adaptive_pred_interp_filter;
 
   // Implements various heuristics to skip searching modes
   // The heuristics selected are based on  flags
@@ -405,75 +407,19 @@ typedef struct {
   // final encode.
   int use_uv_intra_rd_estimate;
 
-  // This picks a loop filter strength by trying a small portion of the image
-  // with different values.
+  // This feature controls how the loop filter level is determined:
+  // 0: Try the full image with different values.
+  // 1: Try a small portion of the image with different values.
+  // 2: Estimate the level based on quantizer and frame type
   int use_fast_lpf_pick;
 
   // This feature limits the number of coefficients updates we actually do
   // by only looking at counts from 1/2 the bands.
   int use_fast_coef_updates;  // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
-} SPEED_FEATURES;
 
-typedef struct {
-  // Rate targetting variables
-  int this_frame_target;
-  int projected_frame_size;
-  int sb64_target_rate;
-  int last_q[3];                   // Separate values for Intra/Inter/ARF-GF
-  int last_boosted_qindex;         // Last boosted GF/KF/ARF q
-
-  int gfu_boost;
-  int last_boost;
-  int kf_boost;
-
-  double rate_correction_factor;
-  double key_frame_rate_correction_factor;
-  double gf_rate_correction_factor;
-
-  unsigned int frames_since_golden;
-  unsigned int frames_till_gf_update_due;  // Count down till next GF
-  unsigned int max_gf_interval;
-  unsigned int baseline_gf_interval;
-  unsigned int frames_to_key;
-  unsigned int frames_since_key;
-  unsigned int this_key_frame_forced;
-  unsigned int next_key_frame_forced;
-  unsigned int source_alt_ref_pending;
-  unsigned int source_alt_ref_active;
-  unsigned int is_src_frame_alt_ref;
-
-  int per_frame_bandwidth;        // Current section per frame bandwidth target
-  int av_per_frame_bandwidth;     // Average frame size target for clip
-  int min_frame_bandwidth;        // Minimum allocation used for any frame
-  int max_frame_bandwidth;        // Maximum burst rate allowed for a frame.
-
-  int ni_av_qi;
-  int ni_tot_qi;
-  int ni_frames;
-  int avg_frame_qindex[3];  // 0 - KEY, 1 - INTER, 2 - ARF/GF
-  double tot_q;
-  double avg_q;
-
-  int buffer_level;
-  int bits_off_target;
-
-  int decimation_factor;
-  int decimation_count;
-
-  int rolling_target_bits;
-  int rolling_actual_bits;
-
-  int long_rolling_target_bits;
-  int long_rolling_actual_bits;
-
-  int64_t total_actual_bits;
-  int total_target_vs_actual;        // debug stats
-
-  int worst_quality;
-  int active_worst_quality;
-  int best_quality;
-  // int active_best_quality;
-} RATE_CONTROL;
+  // This flag control the use of the new super fast rtc mode
+  int super_fast_rtc;
+} SPEED_FEATURES;
 
 typedef struct VP9_COMP {
   DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
@@ -496,7 +442,6 @@ typedef struct VP9_COMP {
   MACROBLOCK mb;
   VP9_COMMON common;
   VP9_CONFIG oxcf;
-  struct rdcost_block_args rdcost_stack;
   struct lookahead_ctx    *lookahead;
   struct lookahead_entry  *source;
 #if CONFIG_MULTIPLE_ARF
@@ -601,11 +546,6 @@ typedef struct VP9_COMP {
   int64_t target_bandwidth;
   struct vpx_codec_pkt_list  *output_pkt_list;
 
-#if 0
-  // Experimental code for lagged and one pass
-  ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS];
-  int one_pass_frame_index;
-#endif
   MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
   int mbgraph_n_frames;             // number of frames filled in the above
   int static_mb_pct;                // % forced skip mbs by segmentation
@@ -613,12 +553,11 @@ typedef struct VP9_COMP {
 
   // for real time encoding
   int speed;
-  int compressor_speed;
 
   int cpu_used;
   int pass;
 
-  vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS];
+  vp9_prob last_skip_false_probs[3][SKIP_CONTEXTS];
   int last_skip_probs_q[3];
 
   int ref_frame_flags;
@@ -780,7 +719,8 @@ typedef struct VP9_COMP {
   PARTITION_CONTEXT left_seg_context[8];
 } VP9_COMP;
 
-static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+static int get_ref_frame_idx(const VP9_COMP *cpi,
+                             MV_REFERENCE_FRAME ref_frame) {
   if (ref_frame == LAST_FRAME) {
     return cpi->lst_fb_idx;
   } else if (ref_frame == GOLDEN_FRAME) {
@@ -790,21 +730,11 @@ static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
   }
 }
 
-static int get_scale_ref_frame_idx(VP9_COMP *cpi,
-                                   MV_REFERENCE_FRAME ref_frame) {
-  if (ref_frame == LAST_FRAME) {
-    return 0;
-  } else if (ref_frame == GOLDEN_FRAME) {
-    return 1;
-  } else {
-    return 2;
-  }
-}
-
 static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi,
                                                 MV_REFERENCE_FRAME ref_frame) {
   VP9_COMMON *const cm = &cpi->common;
-  return &cm->yv12_fb[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]];
+  return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi,
+                                                             ref_frame)]].buf;
 }
 
 void vp9_encode_frame(VP9_COMP *cpi);
@@ -815,7 +745,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
 
 void vp9_set_speed_features(VP9_COMP *cpi);
 
-int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
+int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
+                    const YV12_BUFFER_CONFIG *reference);
 
 void vp9_alloc_compressor_data(VP9_COMP *cpi);
 
@@ -825,4 +756,16 @@ static int get_token_alloc(int mb_rows, int mb_cols) {
   return mb_rows * mb_cols * (48 * 16 + 4);
 }
 
+static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
+                         MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) {
+  xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
+                                                         : 0];
+  xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
+                                                         : 0];
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_ONYX_INT_H_
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index a4ceabdf1..0c0a20f90 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -14,6 +14,7 @@
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_quant_common.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vp9/common/vp9_alloccommon.h"
@@ -33,40 +34,53 @@ static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) {
 void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) {
 }
 
-void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
+static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
+                            MACROBLOCKD *const xd, VP9_COMMON *const cm,
+                            int filt_level, int partial) {
+  int filt_err;
+
+  vp9_set_alt_lf_level(cpi, filt_level);
+  vp9_loop_filter_frame(cm, xd, filt_level, 1, partial);
+
+  filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+
+  // Re-instate the unfiltered frame
+  vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+
+  return filt_err;
+}
+
+static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+                                int partial) {
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   VP9_COMMON *const cm = &cpi->common;
   struct loopfilter *const lf = &cm->lf;
   const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
   const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
-  int best_err = 0;
-  int filt_err = 0;
+  int best_err;
   int filt_best;
   int filt_direction = 0;
   // Start the search at the previous frame filter level unless it is now out of
   // range.
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
   int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
+  // Sum squared error at each filter level
+  int ss_err[MAX_LOOP_FILTER + 1];
 
-  lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
-                                                    : cpi->oxcf.sharpness;
+  // Set each entry to -1
+  vpx_memset(ss_err, 0xFF, sizeof(ss_err));
 
   //  Make a copy of the unfiltered / processed recon buffer
   vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
 
-  // Get baseline error score
-  vp9_set_alt_lf_level(cpi, filt_mid);
-  vp9_loop_filter_frame(cm, xd, filt_mid, 1, partial);
-
-  best_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+  best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial);
   filt_best = filt_mid;
-
-  //  Re-instate the unfiltered frame
-  vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+  ss_err[filt_mid] = best_err;
 
   while (filter_step > 0) {
     const int filt_high = MIN(filt_mid + filter_step, max_filter_level);
     const int filt_low = MAX(filt_mid - filter_step, min_filter_level);
+    int filt_err;
 
     // Bias against raising loop filter in favor of lowering it.
     int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
@@ -80,14 +94,12 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
 
     if (filt_direction <= 0 && filt_low != filt_mid) {
       // Get Low filter error score
-      vp9_set_alt_lf_level(cpi, filt_low);
-      vp9_loop_filter_frame(cm, xd, filt_low, 1, partial);
-
-      filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
-
-      // Re-instate the unfiltered frame
-      vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
-
+      if (ss_err[filt_low] < 0) {
+        filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial);
+        ss_err[filt_low] = filt_err;
+      } else {
+        filt_err = ss_err[filt_low];
+      }
       // If value is close to the best so far then bias towards a lower loop
       // filter value.
       if ((filt_err - bias) < best_err) {
@@ -101,14 +113,12 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
 
     // Now look at filt_high
     if (filt_direction >= 0 && filt_high != filt_mid) {
-      vp9_set_alt_lf_level(cpi, filt_high);
-      vp9_loop_filter_frame(cm, xd, filt_high, 1, partial);
-
-      filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
-
-      //  Re-instate the unfiltered frame
-      vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
-
+      if (ss_err[filt_high] < 0) {
+        filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial);
+        ss_err[filt_high] = filt_err;
+      } else {
+        filt_err = ss_err[filt_high];
+      }
       // Was it better than the previous best?
       if (filt_err < (best_err - bias)) {
         best_err = filt_err;
@@ -128,3 +138,27 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
 
   lf->filter_level = filt_best;
 }
+
+void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+                           int method) {
+  VP9_COMMON *const cm = &cpi->common;
+  struct loopfilter *const lf = &cm->lf;
+
+  lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
+                                                    : cpi->oxcf.sharpness;
+
+  if (method == 2) {
+    const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
+    const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+    const int q = vp9_ac_quant(cm->base_qindex, 0);
+    // These values were determined by linear fitting the result of the
+    // searched level
+    // filt_guess = q * 0.316206 + 3.87252
+    int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18;
+    if (cm->frame_type == KEY_FRAME)
+      filt_guess -= 4;
+    lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+  } else {
+    search_filter_level(sd, cpi, method == 1);
+  }
+}
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h
index 9de4cf849..0fc1f88b3 100644
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -12,11 +12,19 @@
 #ifndef VP9_ENCODER_VP9_PICKLPF_H_
 #define VP9_ENCODER_VP9_PICKLPF_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct yv12_buffer_config;
 struct VP9_COMP;
 
 void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
 
-void vp9_pick_filter_level(struct yv12_buffer_config *sd,
-                           struct VP9_COMP *cpi, int partial);
+void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
+                           struct VP9_COMP *cpi, int method);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_PICKLPF_H_
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index f317f2a0d..bd28ea51e 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -60,8 +60,8 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   int buf_offset;
   int stride = xd->plane[0].pre[0].stride;
 
-  YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref);
-
+  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
+                                                                        ref);
   if (scaled_ref_frame) {
     int i;
     // Swap out the reference frame for a version that's been scaled to
@@ -80,7 +80,7 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   step_param = 6;
   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
-  for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
+  for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
     if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
       tmp_mv->as_int = INVALID_MV;
 
@@ -124,8 +124,8 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                    stride, 0x7fffffff);
 
   // scale to 1/8 pixel resolution
-  tmp_mv->as_mv.row = tmp_mv->as_mv.row << 3;
-  tmp_mv->as_mv.col = tmp_mv->as_mv.col << 3;
+  tmp_mv->as_mv.row = tmp_mv->as_mv.row * 8;
+  tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8;
 
   // calculate the bit cost on motion vector
   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
@@ -142,8 +142,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                             int mi_row, int mi_col,
                             int *returnrate,
                             int64_t *returndistortion,
-                            BLOCK_SIZE bsize,
-                            PICK_MODE_CONTEXT *ctx) {
+                            BLOCK_SIZE bsize) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
@@ -155,6 +154,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                     VP9_ALT_FLAG };
   int64_t best_rd = INT64_MAX;
   int64_t this_rd;
+  int64_t cost[4]= { 0, 100, 150,  205 };
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
@@ -171,7 +171,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   mbmi->tx_size = MIN(max_txsize_lookup[bsize],
                       tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
 
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+  for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
     x->pred_mv_sad[ref_frame] = INT_MAX;
     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
       vp9_setup_buffer_inter(cpi, x, tile,
@@ -182,7 +182,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
 
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+  for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
     int rate_mv = 0;
 
     if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
@@ -191,29 +191,42 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     // Select prediction reference frames.
     xd->plane[0].pre[0] = yv12_mb[ref_frame][0];
 
-
-    x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
-        full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
-                                 &frame_mv[NEWMV][ref_frame], &rate_mv);
-
-    if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
-      continue;
-
     clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
     clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
 
     for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
-      int rate = x->inter_mode_cost[mbmi->mode_context[ref_frame]]
-                                   [INTER_OFFSET(this_mode)];
-      int64_t dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] *
-                      x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
-      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+      int rate = cost[this_mode - NEARESTMV];
+      int64_t dist;
+
+      if (this_mode == NEWMV) {
+        if (this_rd < 300)
+          continue;
+
+        x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
+            full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+                                     &frame_mv[NEWMV][ref_frame], &rate_mv);
+
+        if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
+          continue;
+      }
+
+      dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
+      this_rd = rate + dist;
 
       if (this_rd < best_rd) {
         best_rd = this_rd;
         mbmi->mode = this_mode;
         mbmi->ref_frame[0] = ref_frame;
         mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+        xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+        mbmi->interp_filter = EIGHTTAP;
+
+        mbmi->ref_frame[1] = INTRA_FRAME;
+        mbmi->tx_size = max_txsize_lookup[bsize];
+        mbmi->uv_mode = this_mode;
+        mbmi->skip_coeff = 0;
+        mbmi->sb_type = bsize;
+        mbmi->segment_id = 0;
       }
     }
   }
@@ -223,8 +236,5 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   // TODO(jingning) intra prediction search, if the best SAD is above a certain
   // threshold.
 
-  // store mode decisions
-  ctx->mic = *xd->mi_8x8[0];
-
   return INT64_MAX;
 }
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index 32750fa69..05ff18762 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -8,12 +8,24 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#ifndef VP9_ENCODER_VP9_PICKMODE_H_
+#define VP9_ENCODER_VP9_PICKMODE_H_
+
 #include "vp9/encoder/vp9_onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                             const struct TileInfo *const tile,
                             int mi_row, int mi_col,
                             int *returnrate,
                             int64_t *returndistortion,
-                            BLOCK_SIZE bsize,
-                            PICK_MODE_CONTEXT *ctx);
+                            BLOCK_SIZE bsize);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_PICKMODE_H_
diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h
index 15dd8366b..ffe00ed2c 100644
--- a/vp9/encoder/vp9_psnr.h
+++ b/vp9/encoder/vp9_psnr.h
@@ -12,6 +12,14 @@
 #ifndef VP9_ENCODER_VP9_PSNR_H_
 #define VP9_ENCODER_VP9_PSNR_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 double vp9_mse2psnr(double samples, double peak, double mse);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_PSNR_H_
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 41cfa5283..680cf4aec 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -13,6 +13,10 @@
 
 #include "vp9/encoder/vp9_block.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan);
 
@@ -28,4 +32,8 @@ void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x);
 
 void vp9_init_quantizer(struct VP9_COMP *cpi);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_QUANTIZE_H_
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 3ebf98c0f..74eb98fb0 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -218,7 +218,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) {
   vp9_clear_system_state();  // __asm emms;
 
   // For 1-pass.
-  if (cpi->pass == 0) {
+  if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
     if (cpi->common.current_video_frame == 0) {
       target = oxcf->starting_buffer_level / 2;
     } else {
@@ -246,7 +246,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) {
 
   if (oxcf->rc_max_intra_bitrate_pct) {
     const int max_rate = rc->per_frame_bandwidth *
-                             oxcf->rc_max_intra_bitrate_pct / 100;
+        oxcf->rc_max_intra_bitrate_pct / 100;
     target = MIN(target, max_rate);
   }
   rc->this_frame_target = target;
@@ -375,27 +375,22 @@ static int target_size_from_buffer_level(const VP9_CONFIG *oxcf,
 static void calc_pframe_target_size(VP9_COMP *const cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
-  int min_frame_target = MAX(rc->min_frame_bandwidth,
-                             rc->av_per_frame_bandwidth >> 5);
-  if (cpi->refresh_alt_ref_frame) {
-    // Special alt reference frame case
-    // Per frame bit target for the alt ref frame
-    rc->per_frame_bandwidth = cpi->twopass.gf_bits;
-    rc->this_frame_target = rc->per_frame_bandwidth;
-  } else {
-    // Normal frames (gf and inter).
-    rc->this_frame_target = rc->per_frame_bandwidth;
-    // Set target frame size based on buffer level, for 1 pass CBR.
-    if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
-      // Need to decide how low min_frame_target should be for 1-pass CBR.
-      // For now, use: cpi->rc.av_per_frame_bandwidth / 16:
-      min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
-                             FRAME_OVERHEAD_BITS);
-      rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
-      // Adjust qp-max based on buffer level.
-      rc->active_worst_quality =
-          adjust_active_worst_quality_from_buffer_level(oxcf, rc);
-    }
+  int min_frame_target;
+  rc->this_frame_target = rc->per_frame_bandwidth;
+
+  if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+    // Need to decide how low min_frame_target should be for 1-pass CBR.
+    // For now, use: cpi->rc.av_per_frame_bandwidth / 16:
+    min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+                           FRAME_OVERHEAD_BITS);
+    rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
+    // Adjust qp-max based on buffer level.
+    rc->active_worst_quality =
+        adjust_active_worst_quality_from_buffer_level(oxcf, rc);
+
+    if (rc->this_frame_target < min_frame_target)
+      rc->this_frame_target = min_frame_target;
+    return;
   }
 
   // Check that the total sum of adjustments is not above the maximum allowed.
@@ -404,6 +399,9 @@ static void calc_pframe_target_size(VP9_COMP *const cpi) {
   // not capable of recovering all the extra bits we have spent in the KF or GF,
   // then the remainder will have to be recovered over a longer time span via
   // other buffer / rate control mechanisms.
+  min_frame_target = MAX(rc->min_frame_bandwidth,
+                         rc->av_per_frame_bandwidth >> 5);
+
   if (rc->this_frame_target < min_frame_target)
     rc->this_frame_target = min_frame_target;
 
@@ -468,8 +466,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 
   // Work out a size correction factor.
   if (projected_size_based_on_q > 0)
-    correction_factor =
-        (100 * cpi->rc.projected_frame_size) / projected_size_based_on_q;
+    correction_factor = (100 * cpi->rc.projected_frame_size) /
+                            projected_size_based_on_q;
 
   // More heavily damped adjustment used if we have been oscillating either side
   // of target.
@@ -514,26 +512,25 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 
 int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
                       int active_best_quality, int active_worst_quality) {
+  const VP9_COMMON *const cm = &cpi->common;
   int q = active_worst_quality;
   int last_error = INT_MAX;
-  int i, target_bits_per_mb, bits_per_mb_at_this_q;
+  int i, target_bits_per_mb;
   const double correction_factor = get_rate_correction_factor(cpi);
 
   // Calculate required scaling factor based on target frame size and size of
   // frame produced using previous Q.
   if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
-    target_bits_per_mb =
-        (target_bits_per_frame / cpi->common.MBs)
-        << BPER_MB_NORMBITS;  // Case where we would overflow int
+    // Case where we would overflow int
+    target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS;
   else
-    target_bits_per_mb =
-        (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs;
+    target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
 
   i = active_best_quality;
 
   do {
-    bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cpi->common.frame_type, i,
-                                                    correction_factor);
+    const int bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cm->frame_type, i,
+                                                             correction_factor);
 
     if (bits_per_mb_at_this_q <= target_bits_per_mb) {
       if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
@@ -550,25 +547,19 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
   return q;
 }
 
-static int get_active_quality(int q,
-                              int gfu_boost,
-                              int low,
-                              int high,
-                              int *low_motion_minq,
-                              int *high_motion_minq) {
-  int active_best_quality;
+static int get_active_quality(int q, int gfu_boost, int low, int high,
+                              int *low_motion_minq, int *high_motion_minq) {
   if (gfu_boost > high) {
-    active_best_quality = low_motion_minq[q];
+    return low_motion_minq[q];
   } else if (gfu_boost < low) {
-    active_best_quality = high_motion_minq[q];
+    return high_motion_minq[q];
   } else {
     const int gap = high - low;
     const int offset = high - gfu_boost;
     const int qdiff = high_motion_minq[q] - low_motion_minq[q];
     const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
-    active_best_quality = low_motion_minq[q] + adjustment;
+    return low_motion_minq[q] + adjustment;
   }
-  return active_best_quality;
 }
 
 int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
@@ -615,8 +606,8 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality +=
-          vp9_compute_qdelta(cpi, q_val, (q_val * q_adj_factor));
+      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
+                                                   q_adj_factor);
     }
 #else
     double current_q;
@@ -720,15 +711,12 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
   if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cpi->pass == 0 && cm->current_video_frame == 0)) {
-      *top_index =
-          (active_worst_quality + active_best_quality * 3) / 4;
-    }
+    if (!(cpi->pass == 0 && cm->current_video_frame == 0))
+      *top_index = (active_worst_quality + active_best_quality * 3) / 4;
   } else if (!rc->is_src_frame_alt_ref &&
              (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    *top_index =
-      (active_worst_quality + active_best_quality) / 2;
+    *top_index = (active_worst_quality + active_best_quality) / 2;
   }
 #endif
 
@@ -818,7 +806,8 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
 
 // return of 0 means drop frame
 int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
 
   if (cm->frame_type == KEY_FRAME)
     calc_iframe_target_size(cpi);
@@ -826,12 +815,12 @@ int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) {
     calc_pframe_target_size(cpi);
 
   // Clip the frame target to the maximum allowed value.
-  if (cpi->rc.this_frame_target > cpi->rc.max_frame_bandwidth)
-    cpi->rc.this_frame_target = cpi->rc.max_frame_bandwidth;
+  if (rc->this_frame_target > rc->max_frame_bandwidth)
+    rc->this_frame_target = rc->max_frame_bandwidth;
 
   // Target rate per SB64 (including partial SB64s.
-  cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) /
-                             (cpi->common.width * cpi->common.height);
+  rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) /
+                             (cm->width * cm->height);
   return 1;
 }
 
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 086755af8..eba4b7a92 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -12,61 +12,130 @@
 #ifndef VP9_ENCODER_VP9_RATECTRL_H_
 #define VP9_ENCODER_VP9_RATECTRL_H_
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 #define FRAME_OVERHEAD_BITS 200
 
-void vp9_save_coding_context(VP9_COMP *cpi);
-void vp9_restore_coding_context(VP9_COMP *cpi);
-
-void vp9_setup_key_frame(VP9_COMP *cpi);
-void vp9_setup_inter_frame(VP9_COMP *cpi);
+typedef struct {
+  // Rate targetting variables
+  int this_frame_target;
+  int projected_frame_size;
+  int sb64_target_rate;
+  int last_q[3];                   // Separate values for Intra/Inter/ARF-GF
+  int last_boosted_qindex;         // Last boosted GF/KF/ARF q
+
+  int gfu_boost;
+  int last_boost;
+  int kf_boost;
+
+  double rate_correction_factor;
+  double key_frame_rate_correction_factor;
+  double gf_rate_correction_factor;
+
+  unsigned int frames_since_golden;
+  unsigned int frames_till_gf_update_due;  // Count down till next GF
+  unsigned int max_gf_interval;
+  unsigned int baseline_gf_interval;
+  unsigned int frames_to_key;
+  unsigned int frames_since_key;
+  unsigned int this_key_frame_forced;
+  unsigned int next_key_frame_forced;
+  unsigned int source_alt_ref_pending;
+  unsigned int source_alt_ref_active;
+  unsigned int is_src_frame_alt_ref;
+
+  int per_frame_bandwidth;        // Current section per frame bandwidth target
+  int av_per_frame_bandwidth;     // Average frame size target for clip
+  int min_frame_bandwidth;        // Minimum allocation used for any frame
+  int max_frame_bandwidth;        // Maximum burst rate allowed for a frame.
+
+  int ni_av_qi;
+  int ni_tot_qi;
+  int ni_frames;
+  int avg_frame_qindex[3];  // 0 - KEY, 1 - INTER, 2 - ARF/GF
+  double tot_q;
+  double avg_q;
+
+  int buffer_level;
+  int bits_off_target;
+
+  int decimation_factor;
+  int decimation_count;
+
+  int rolling_target_bits;
+  int rolling_actual_bits;
+
+  int long_rolling_target_bits;
+  int long_rolling_actual_bits;
+
+  int64_t total_actual_bits;
+  int total_target_vs_actual;        // debug stats
+
+  int worst_quality;
+  int active_worst_quality;
+  int best_quality;
+  // int active_best_quality;
+} RATE_CONTROL;
+
+struct VP9_COMP;
+
+void vp9_save_coding_context(struct VP9_COMP *cpi);
+void vp9_restore_coding_context(struct VP9_COMP *cpi);
+
+void vp9_setup_key_frame(struct VP9_COMP *cpi);
+void vp9_setup_inter_frame(struct VP9_COMP *cpi);
 
 double vp9_convert_qindex_to_q(int qindex);
 
 // Updates rate correction factors
-void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
+void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi, int damp_var);
 
 // initialize luts for minq
 void vp9_rc_init_minq_luts(void);
 
 // return of 0 means drop frame
 // Changes only rc.this_frame_target and rc.sb64_rate_target
-int vp9_rc_pick_frame_size_target(VP9_COMP *cpi);
+int vp9_rc_pick_frame_size_target(struct VP9_COMP *cpi);
 
-void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
+void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
                                       int this_frame_target,
                                       int *frame_under_shoot_limit,
                                       int *frame_over_shoot_limit);
 
 // Picks q and q bounds given the target for bits
-int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
+int vp9_rc_pick_q_and_adjust_q_bounds(const struct VP9_COMP *cpi,
                                       int *bottom_index,
                                       int *top_index);
 
 // Estimates q to achieve a target bits per frame
-int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
+int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame,
                       int active_best_quality, int active_worst_quality);
 
 // Post encode update of the rate control parameters based
 // on bytes used
-void vp9_rc_postencode_update(VP9_COMP *cpi,
+void vp9_rc_postencode_update(struct VP9_COMP *cpi,
                               uint64_t bytes_used);
 // for dropped frames
-void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi);
+void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
 
 // estimates bits per mb for a given qindex and correction factor
 int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
                        double correction_factor);
 
 // Post encode update of the rate control parameters for 2-pass
-void vp9_twopass_postencode_update(VP9_COMP *cpi,
+void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
                                    uint64_t bytes_used);
 
 // Decide if we should drop this frame: For 1-pass CBR.
-int vp9_drop_frame(VP9_COMP *cpi);
+int vp9_drop_frame(struct VP9_COMP *cpi);
 
 // Update the buffer level.
-void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size);
+void vp9_update_buffer_level(struct VP9_COMP *cpi, int encoded_frame_size);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
 
 #endif  // VP9_ENCODER_VP9_RATECTRL_H_
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 242aa8710..9cca3bd52 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -55,6 +55,22 @@ typedef struct {
   MV_REFERENCE_FRAME ref_frame[2];
 } REF_DEFINITION;
 
+struct rdcost_block_args {
+  MACROBLOCK *x;
+  ENTROPY_CONTEXT t_above[16];
+  ENTROPY_CONTEXT t_left[16];
+  int rate;
+  int64_t dist;
+  int64_t sse;
+  int this_rate;
+  int64_t this_dist;
+  int64_t this_sse;
+  int64_t this_rd;
+  int64_t best_rd;
+  int skip;
+  const int16_t *scan, *nb;
+};
+
 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {NEARESTMV, {LAST_FRAME,   NONE}},
   {NEARESTMV, {ALTREF_FRAME, NONE}},
@@ -280,22 +296,24 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 
   fill_token_costs(x->token_costs, cm->fc.coef_probs);
 
-  for (i = 0; i < PARTITION_CONTEXTS; i++)
-    vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
-                    vp9_partition_tree);
+  if (!cpi->sf.super_fast_rtc) {
+    for (i = 0; i < PARTITION_CONTEXTS; i++)
+      vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
+                      vp9_partition_tree);
 
-  fill_mode_costs(cpi);
+    fill_mode_costs(cpi);
 
-  if (!frame_is_intra_only(cm)) {
-    vp9_build_nmv_cost_table(x->nmvjointcost,
-                             cm->allow_high_precision_mv ? x->nmvcost_hp
-                                                         : x->nmvcost,
-                             &cm->fc.nmvc,
-                             cm->allow_high_precision_mv, 1, 1);
+    if (!frame_is_intra_only(cm)) {
+      vp9_build_nmv_cost_table(x->nmvjointcost,
+                               cm->allow_high_precision_mv ? x->nmvcost_hp
+                                                           : x->nmvcost,
+                               &cm->fc.nmvc,
+                               cm->allow_high_precision_mv, 1, 1);
 
-    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-      vp9_cost_tokens((int *)x->inter_mode_cost[i],
-                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+      for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+        vp9_cost_tokens((int *)x->inter_mode_cost[i],
+                        cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+    }
   }
 }
 
@@ -419,16 +437,26 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+
     (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                               pd->dst.buf, pd->dst.stride, &sse);
+
     if (i == 0)
       x->pred_sse[ref] = sse;
-
-    dist_sum += (int)sse;
+    if (cpi->sf.super_fast_rtc) {
+      dist_sum += (int)sse;
+    } else {
+      int rate;
+      int64_t dist;
+      model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+                               pd->dequant[1] >> 3, &rate, &dist);
+      rate_sum += rate;
+      dist_sum += (int)dist;
+    }
   }
 
   *out_rate_sum = rate_sum;
-  *out_dist_sum = dist_sum << 4;
+  *out_dist_sum = (int64_t)dist_sum << 4;
 }
 
 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
@@ -575,15 +603,15 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
   return cost;
 }
 
-static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
+static void dist_block(int plane, int block, TX_SIZE tx_size,
+                       struct rdcost_block_args* args) {
   const int ss_txfrm_size = tx_size << 1;
-  struct rdcost_block_args* args = arg;
   MACROBLOCK* const x = args->x;
   MACROBLOCKD* const xd = &x->e_mbd;
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   int64_t this_sse;
-  int shift = args->tx_size == TX_32X32 ? 0 : 2;
+  int shift = tx_size == TX_32X32 ? 0 : 2;
   int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
@@ -600,14 +628,12 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
 }
 
 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
-                       TX_SIZE tx_size, void *arg) {
-  struct rdcost_block_args* args = arg;
-
+                       TX_SIZE tx_size, struct rdcost_block_args* args) {
   int x_idx, y_idx;
-  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
 
   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
-                           args->t_left + y_idx, args->tx_size,
+                           args->t_left + y_idx, tx_size,
                            args->scan, args->nb);
 }
 
@@ -684,24 +710,19 @@ void vp9_get_entropy_contexts(TX_SIZE tx_size,
   }
 }
 
-static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
-                              const int num_4x4_w, const int num_4x4_h,
-                              const int64_t ref_rdcost,
+static void init_rdcost_stack(MACROBLOCK *x, const int64_t ref_rdcost,
                               struct rdcost_block_args *arg) {
   vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
   arg->x = x;
-  arg->tx_size = tx_size;
-  arg->bw = num_4x4_w;
-  arg->bh = num_4x4_h;
   arg->best_rd = ref_rdcost;
 }
 
 static void txfm_rd_in_plane(MACROBLOCK *x,
-                             struct rdcost_block_args *rd_stack,
                              int *rate, int64_t *distortion,
                              int *skippable, int64_t *sse,
                              int64_t ref_best_rd, int plane,
                              BLOCK_SIZE bsize, TX_SIZE tx_size) {
+  struct rdcost_block_args rd_stack;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -709,30 +730,29 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
   const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
   const scan_order *so;
 
-  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
-                    ref_best_rd, rd_stack);
+  init_rdcost_stack(x, ref_best_rd, &rd_stack);
   if (plane == 0)
     xd->mi_8x8[0]->mbmi.tx_size = tx_size;
 
-  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
+  vp9_get_entropy_contexts(tx_size, rd_stack.t_above, rd_stack.t_left,
                            pd->above_context, pd->left_context,
                            num_4x4_w, num_4x4_h);
 
   so = get_scan(xd, tx_size, pd->plane_type, 0);
-  rd_stack->scan = so->scan;
-  rd_stack->nb = so->neighbors;
+  rd_stack.scan = so->scan;
+  rd_stack.nb = so->neighbors;
 
   foreach_transformed_block_in_plane(xd, bsize, plane,
-                                     block_rd_txfm, rd_stack);
-  if (rd_stack->skip) {
+                                     block_rd_txfm, &rd_stack);
+  if (rd_stack.skip) {
     *rate       = INT_MAX;
     *distortion = INT64_MAX;
     *sse        = INT64_MAX;
     *skippable  = 0;
   } else {
-    *distortion = rd_stack->this_dist;
-    *rate       = rd_stack->this_rate;
-    *sse        = rd_stack->this_sse;
+    *distortion = rd_stack.this_dist;
+    *rate       = rd_stack.this_rate;
+    *sse        = rd_stack.this_sse;
     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
   }
 }
@@ -750,7 +770,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
 
   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 
-  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
+  txfm_rd_in_plane(x, rate, distortion, skip,
                    &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                    mbmi->tx_size);
   cpi->tx_stepdown_count[0]++;
@@ -881,7 +901,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
 
   // Actually encode using the chosen mode if a model was used, but do not
   // update the r, d costs
-  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
+  txfm_rd_in_plane(x, rate, distortion, skip,
                    &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
 
   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
@@ -904,7 +924,6 @@ static void super_block_yrd(VP9_COMP *cpi,
   int64_t d[TX_SIZES], sse[TX_SIZES];
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-  struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
   const int b_inter_mode = is_inter_block(mbmi);
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   TX_SIZE tx_size;
@@ -934,7 +953,7 @@ static void super_block_yrd(VP9_COMP *cpi,
                                   skip, sse, ref_best_rd, bs);
   } else {
     for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
-      txfm_rd_in_plane(x, rdcost_stack, &r[tx_size][0], &d[tx_size],
+      txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
                        &s[tx_size], &sse[tx_size],
                        ref_best_rd, 0, bs, tx_size);
     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
@@ -1263,7 +1282,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
   *skippable = 1;
 
   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-    txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse,
+    txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
                      ref_best_rd, plane, bsize, uv_txfm_size);
     if (pnrate == INT_MAX)
       goto term;
@@ -1517,8 +1536,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
                               dst, pd->dst.stride,
                               &mi->bmi[i].as_mv[ref].as_mv,
-                              &xd->block_refs[ref]->sf,
-                              width, height, ref, &xd->subpix, MV_PRECISION_Q3,
+                              &xd->block_refs[ref]->sf, width, height, ref,
+                              xd->interp_kernel, MV_PRECISION_Q3,
                               mi_col * MI_SIZE + 4 * (i % 2),
                               mi_row * MI_SIZE + 4 * (i / 2));
   }
@@ -1749,7 +1768,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
           if (best_rd < label_mv_thresh)
             break;
 
-          if (cpi->compressor_speed) {
+          if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
+              cpi->oxcf.mode != MODE_BESTQUALITY) {
             // use previous block's result as next block's MV predictor.
             if (i > 0) {
               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
@@ -1813,7 +1833,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
           }
 
           // Should we do a full search (best quality only)
-          if (cpi->compressor_speed == 0) {
+          if (cpi->oxcf.mode == MODE_BESTQUALITY ||
+              cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
             /* Check if mvp_full is within the range. */
             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
                      x->mv_row_min, x->mv_row_max);
@@ -1840,7 +1861,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                          &bsi->ref_mv->as_mv,
                                          cm->allow_high_precision_mv,
                                          x->errorperbit, v_fn_ptr,
-                                         0, cpi->sf.subpel_iters_per_step,
+                                         cpi->sf.subpel_force_stop,
+                                         cpi->sf.subpel_iters_per_step,
                                          x->nmvjointcost, x->mvcost,
                                          &distortion,
                                          &x->pred_sse[mbmi->ref_frame[0]]);
@@ -2304,13 +2326,12 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
             ref_frame, block_size);
 }
 
-YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
-  YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
-  int fb = get_ref_frame_idx(cpi, ref_frame);
-  int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame);
-  if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb])
-    scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]];
-  return scaled_ref_frame;
+const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
+                                                   int ref_frame) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
+  const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
+  return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
 }
 
 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
@@ -2342,7 +2363,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   int tmp_row_min = x->mv_row_min;
   int tmp_row_max = x->mv_row_max;
 
-  YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref);
+  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
+                                                                        ref);
 
   int_mv pred_mv[3];
   pred_mv[0] = mbmi->ref_mvs[ref][0];
@@ -2451,7 +2473,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  cm->allow_high_precision_mv,
                                  x->errorperbit,
                                  &cpi->fn_ptr[bsize],
-                                 0, cpi->sf.subpel_iters_per_step,
+                                 cpi->sf.subpel_force_stop,
+                                 cpi->sf.subpel_iters_per_step,
                                  x->nmvjointcost, x->mvcost,
                                  &dis, &x->pred_sse[ref]);
   }
@@ -2489,7 +2512,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
   int last_besterr[2] = {INT_MAX, INT_MAX};
-  YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
+  const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
   };
@@ -2536,7 +2559,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                               &frame_mv[refs[!id]].as_mv,
                               &xd->block_refs[!id]->sf,
                               pw, ph, 0,
-                              &xd->subpix, MV_PRECISION_Q3,
+                              xd->interp_kernel, MV_PRECISION_Q3,
                               mi_col * MI_SIZE, mi_row * MI_SIZE);
 
     // Compound motion search on first ref frame.
@@ -2626,7 +2649,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                  int *rate_y, int64_t *distortion_y,
                                  int *rate_uv, int64_t *distortion_uv,
                                  int *mode_excluded, int *disable_skip,
-                                 INTERPOLATION_TYPE *best_filter,
+                                 INTERP_FILTER *best_filter,
                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
                                  int mi_row, int mi_col,
                                  int_mv single_newmv[MAX_REF_FRAMES],
@@ -2769,7 +2792,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     cpi->rd_filter_cache[i] = INT64_MAX;
 
-  if (cm->mcomp_filter_type != BILINEAR) {
+  if (cm->interp_filter != BILINEAR) {
     *best_filter = EIGHTTAP;
     if (x->source_variance <
         cpi->sf.disable_filter_search_var_thresh) {
@@ -2783,7 +2806,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int j;
         int64_t rs_rd;
         mbmi->interp_filter = i;
-        vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+        xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
         rs = get_switchable_rate(x);
         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
 
@@ -2792,16 +2815,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           cpi->rd_filter_cache[i] = rd;
           cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
               MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
-          if (cm->mcomp_filter_type == SWITCHABLE)
+          if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
           cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
         } else {
           int rate_sum = 0;
           int64_t dist_sum = 0;
-          if ((cm->mcomp_filter_type == SWITCHABLE &&
+          if ((cm->interp_filter == SWITCHABLE &&
                (!i || best_needs_copy)) ||
-              (cm->mcomp_filter_type != SWITCHABLE &&
-               (cm->mcomp_filter_type == mbmi->interp_filter ||
+              (cm->interp_filter != SWITCHABLE &&
+               (cm->interp_filter == mbmi->interp_filter ||
                 (i == 0 && intpel_mv)))) {
             restore_dst_buf(xd, orig_dst, orig_dst_stride);
           } else {
@@ -2817,7 +2840,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           cpi->rd_filter_cache[i] = rd;
           cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
               MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
-          if (cm->mcomp_filter_type == SWITCHABLE)
+          if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
           cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
 
@@ -2838,13 +2861,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         if (newbest) {
           best_rd = rd;
           *best_filter = mbmi->interp_filter;
-          if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
+          if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
             best_needs_copy = !best_needs_copy;
         }
 
-        if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
-            (cm->mcomp_filter_type != SWITCHABLE &&
-             cm->mcomp_filter_type == mbmi->interp_filter)) {
+        if ((cm->interp_filter == SWITCHABLE && newbest) ||
+            (cm->interp_filter != SWITCHABLE &&
+             cm->interp_filter == mbmi->interp_filter)) {
           pred_exists = 1;
         }
       }
@@ -2852,10 +2875,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
   // Set the appropriate filter
-  mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
-      cm->mcomp_filter_type : *best_filter;
-  vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-  rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
+  mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
+      cm->interp_filter : *best_filter;
+  xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
+  rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
 
   if (pred_exists) {
     if (best_needs_copy) {
@@ -2884,7 +2907,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  if (cm->mcomp_filter_type == SWITCHABLE)
+  if (cm->interp_filter == SWITCHABLE)
     *rate2 += get_switchable_rate(x);
 
   if (!is_comp_pred && cpi->enable_encode_breakout) {
@@ -3129,7 +3152,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_inter_rd = INT64_MAX;
   MB_PREDICTION_MODE best_intra_mode = DC_PRED;
   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
-  INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
+  INTERP_FILTER tmp_best_filter = SWITCHABLE;
   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
   int64_t dist_uv[TX_SIZES];
   int skip_uv[TX_SIZES];
@@ -3277,13 +3300,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
           continue;
     }
 
-    set_scale_factors(cm, xd, ref_frame - 1, second_ref_frame - 1);
+    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
     mbmi->uv_mode = DC_PRED;
 
     // Evaluate all sub-pel filters irrespective of whether we can use
     // them for this frame.
-    mbmi->interp_filter = cm->mcomp_filter_type;
-    vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+                                                          : cm->interp_filter;
+    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
     if (comp_pred) {
       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
@@ -3573,9 +3597,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     /* keep record of best filter type */
     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
-        cm->mcomp_filter_type != BILINEAR) {
-      int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
-                              SWITCHABLE_FILTERS : cm->mcomp_filter_type];
+        cm->interp_filter != BILINEAR) {
+      int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+                              SWITCHABLE_FILTERS : cm->interp_filter];
 
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
         int64_t adj_rd;
@@ -3649,8 +3673,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  assert((cm->mcomp_filter_type == SWITCHABLE) ||
-         (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
+  assert((cm->interp_filter == SWITCHABLE) ||
+         (cm->interp_filter == best_mbmode.interp_filter) ||
          !is_inter_block(&best_mbmode));
 
   // Updating rd_thresh_freq_fact[] here means that the different
@@ -3692,7 +3716,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       else
         best_filter_diff[i] = best_rd - best_filter_rd[i];
     }
-    if (cm->mcomp_filter_type == SWITCHABLE)
+    if (cm->interp_filter == SWITCHABLE)
       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
   } else {
     vp9_zero(best_filter_diff);
@@ -3709,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     vp9_zero(best_tx_diff);
   }
 
-  set_scale_factors(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1);
+  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   store_coding_context(x, ctx, best_mode_index,
                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
@@ -3754,7 +3778,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   vp9_prob comp_mode_p;
   int64_t best_inter_rd = INT64_MAX;
   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
-  INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
+  INTERP_FILTER tmp_best_filter = SWITCHABLE;
   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
   int64_t dist_uv[TX_SIZES];
   int skip_uv[TX_SIZES];
@@ -3902,13 +3926,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
       continue;
 
-    set_scale_factors(cm, xd, ref_frame - 1, second_ref_frame - 1);
+    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
     mbmi->uv_mode = DC_PRED;
 
     // Evaluate all sub-pel filters irrespective of whether we can use
     // them for this frame.
-    mbmi->interp_filter = cm->mcomp_filter_type;
-    vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+                                                          : cm->interp_filter;
+    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
     if (comp_pred) {
       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
@@ -4013,17 +4038,17 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
         cpi->rd_filter_cache[i] = INT64_MAX;
 
-      if (cm->mcomp_filter_type != BILINEAR) {
+      if (cm->interp_filter != BILINEAR) {
         tmp_best_filter = EIGHTTAP;
         if (x->source_variance <
             cpi->sf.disable_filter_search_var_thresh) {
           tmp_best_filter = EIGHTTAP;
-        } else if (cpi->sf.adaptive_pred_filter_type == 1 &&
-                   ctx->pred_filter_type < SWITCHABLE) {
-          tmp_best_filter = ctx->pred_filter_type;
-        } else if (cpi->sf.adaptive_pred_filter_type == 2) {
-          tmp_best_filter = ctx->pred_filter_type < SWITCHABLE ?
-                              ctx->pred_filter_type : 0;
+        } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
+                   ctx->pred_interp_filter < SWITCHABLE) {
+          tmp_best_filter = ctx->pred_interp_filter;
+        } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
+          tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
+                              ctx->pred_interp_filter : 0;
         } else {
           for (switchable_filter_index = 0;
                switchable_filter_index < SWITCHABLE_FILTERS;
@@ -4031,8 +4056,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
             int newbest, rs;
             int64_t rs_rd;
             mbmi->interp_filter = switchable_filter_index;
-            vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-
+            xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
             tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
                                                  &mbmi->ref_mvs[ref_frame][0],
                                                  second_ref,
@@ -4051,7 +4075,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
             cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
                 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
                     tmp_rd + rs_rd);
-            if (cm->mcomp_filter_type == SWITCHABLE)
+            if (cm->interp_filter == SWITCHABLE)
               tmp_rd += rs_rd;
 
             cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
@@ -4061,9 +4085,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
               tmp_best_filter = mbmi->interp_filter;
               tmp_best_rd = tmp_rd;
             }
-            if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
-                (mbmi->interp_filter == cm->mcomp_filter_type &&
-                 cm->mcomp_filter_type != SWITCHABLE)) {
+            if ((newbest && cm->interp_filter == SWITCHABLE) ||
+                (mbmi->interp_filter == cm->interp_filter &&
+                 cm->interp_filter != SWITCHABLE)) {
               tmp_best_rdu = tmp_rd;
               tmp_best_rate = rate;
               tmp_best_ratey = rate_y;
@@ -4095,9 +4119,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       if (tmp_best_rdu == INT64_MAX && pred_exists)
         continue;
 
-      mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
-                             tmp_best_filter : cm->mcomp_filter_type);
-      vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+      mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
+                             tmp_best_filter : cm->interp_filter);
+      xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
       if (!pred_exists) {
         // Handles the special case when a filter that is not in the
         // switchable list (bilinear, 6-tap) is indicated at the frame level
@@ -4113,7 +4137,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         if (tmp_rd == INT64_MAX)
           continue;
       } else {
-        if (cm->mcomp_filter_type == SWITCHABLE) {
+        if (cm->interp_filter == SWITCHABLE) {
           int rs = get_switchable_rate(x);
           tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
         }
@@ -4131,7 +4155,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       rate2 += rate;
       distortion2 += distortion;
 
-      if (cm->mcomp_filter_type == SWITCHABLE)
+      if (cm->interp_filter == SWITCHABLE)
         rate2 += get_switchable_rate(x);
 
       if (!mode_excluded)
@@ -4299,9 +4323,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
     /* keep record of best filter type */
     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
-        cm->mcomp_filter_type != BILINEAR) {
-      int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
-                              SWITCHABLE_FILTERS : cm->mcomp_filter_type];
+        cm->interp_filter != BILINEAR) {
+      int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+                              SWITCHABLE_FILTERS : cm->interp_filter];
       int64_t adj_rd;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
         if (ref == INT64_MAX)
@@ -4372,8 +4396,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     return best_rd;
   }
 
-  assert((cm->mcomp_filter_type == SWITCHABLE) ||
-         (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
+  assert((cm->interp_filter == SWITCHABLE) ||
+         (cm->interp_filter == best_mbmode.interp_filter) ||
          !is_inter_block(&best_mbmode));
 
   // Updating rd_thresh_freq_fact[] here means that the different
@@ -4425,7 +4449,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       else
         best_filter_diff[i] = best_rd - best_filter_rd[i];
     }
-    if (cm->mcomp_filter_type == SWITCHABLE)
+    if (cm->interp_filter == SWITCHABLE)
       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
   } else {
     vp9_zero(best_filter_diff);
@@ -4442,7 +4466,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     vp9_zero(best_tx_diff);
   }
 
-  set_scale_factors(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1);
+  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   store_coding_context(x, ctx, best_mode_index,
                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 9ac1f5404..96cea4216 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -13,6 +13,10 @@
 
 #include "vp9/encoder/vp9_onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RDDIV_BITS          7
 
 #define RDCOST(RM, DM, R, D) \
@@ -46,7 +50,8 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
                             int_mv frame_near_mv[MAX_REF_FRAMES],
                             struct buf_2d yv12_mb[4][MAX_MB_PLANE]);
 
-YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame);
+const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
+                                                   int ref_frame);
 
 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                int *r, int64_t *d, BLOCK_SIZE bsize,
@@ -80,4 +85,8 @@ void vp9_get_entropy_contexts(TX_SIZE tx_size,
     const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
     int num_4x4_w, int num_4x4_h);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_RDOPT_H_
diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c
index f15abc07d..0766b5107 100644
--- a/vp9/encoder/vp9_resize.c
+++ b/vp9/encoder/vp9_resize.c
@@ -16,7 +16,6 @@
 #include <string.h>
 #include "vp9/common/vp9_common.h"
 #include "vp9/encoder/vp9_resize.h"
-#include "vpx/vpx_integer.h"
 
 #define FILTER_BITS               7
 
@@ -30,8 +29,44 @@
 
 typedef int16_t interp_kernel[INTERP_TAPS];
 
-// Filters for interpolation - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = {
+// Filters for interpolation (0.5-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
+  {-3,  0, 35, 64, 35,  0, -3, 0},
+  {-3, -1, 34, 64, 36,  1, -3, 0},
+  {-3, -1, 32, 64, 38,  1, -3, 0},
+  {-2, -2, 31, 63, 39,  2, -3, 0},
+  {-2, -2, 29, 63, 41,  2, -3, 0},
+  {-2, -2, 28, 63, 42,  3, -4, 0},
+  {-2, -3, 27, 63, 43,  4, -4, 0},
+  {-2, -3, 25, 62, 45,  5, -4, 0},
+  {-2, -3, 24, 62, 46,  5, -4, 0},
+  {-2, -3, 23, 61, 47,  6, -4, 0},
+  {-2, -3, 21, 60, 49,  7, -4, 0},
+  {-1, -4, 20, 60, 50,  8, -4, -1},
+  {-1, -4, 19, 59, 51,  9, -4, -1},
+  {-1, -4, 17, 58, 52, 10, -4, 0},
+  {-1, -4, 16, 57, 53, 12, -4, -1},
+  {-1, -4, 15, 56, 54, 13, -4, -1},
+  {-1, -4, 14, 55, 55, 14, -4, -1},
+  {-1, -4, 13, 54, 56, 15, -4, -1},
+  {-1, -4, 12, 53, 57, 16, -4, -1},
+  {0, -4, 10, 52, 58, 17, -4, -1},
+  {-1, -4,  9, 51, 59, 19, -4, -1},
+  {-1, -4,  8, 50, 60, 20, -4, -1},
+  {0, -4,  7, 49, 60, 21, -3, -2},
+  {0, -4,  6, 47, 61, 23, -3, -2},
+  {0, -4,  5, 46, 62, 24, -3, -2},
+  {0, -4,  5, 45, 62, 25, -3, -2},
+  {0, -4,  4, 43, 63, 27, -3, -2},
+  {0, -4,  3, 42, 63, 28, -2, -2},
+  {0, -3,  2, 41, 63, 29, -2, -2},
+  {0, -3,  2, 39, 63, 31, -2, -2},
+  {0, -3,  1, 38, 64, 32, -1, -3},
+  {0, -3,  1, 36, 64, 34, -1, -3}
+};
+
+// Filters for interpolation (0.625-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
   {-1, -8, 33, 80, 33, -8, -1, 0},
   {-1, -8, 30, 80, 35, -8, -1, 1},
   {-1, -8, 28, 80, 37, -7, -2, 1},
@@ -66,10 +101,132 @@ const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = {
   {1, -1, -8, 35, 80, 30, -8, -1},
 };
 
+// Filters for interpolation (0.75-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
+  {2, -11,  25,  96,  25, -11,   2, 0},
+  {2, -11,  22,  96,  28, -11,   2, 0},
+  {2, -10,  19,  95,  31, -11,   2, 0},
+  {2, -10,  17,  95,  34, -12,   2, 0},
+  {2,  -9,  14,  94,  37, -12,   2, 0},
+  {2,  -8,  12,  93,  40, -12,   1, 0},
+  {2,  -8,   9,  92,  43, -12,   1, 1},
+  {2,  -7,   7,  91,  46, -12,   1, 0},
+  {2,  -7,   5,  90,  49, -12,   1, 0},
+  {2,  -6,   3,  88,  52, -12,   0, 1},
+  {2,  -5,   1,  86,  55, -12,   0, 1},
+  {2,  -5,  -1,  84,  58, -11,   0, 1},
+  {2,  -4,  -2,  82,  61, -11,  -1, 1},
+  {2,  -4,  -4,  80,  64, -10,  -1, 1},
+  {1, -3, -5, 77, 67, -9, -1, 1},
+  {1, -3, -6, 75, 70, -8, -2, 1},
+  {1, -2, -7, 72, 72, -7, -2, 1},
+  {1, -2, -8, 70, 75, -6, -3, 1},
+  {1, -1, -9, 67, 77, -5, -3, 1},
+  {1,  -1, -10,  64,  80,  -4,  -4, 2},
+  {1,  -1, -11,  61,  82,  -2,  -4, 2},
+  {1,   0, -11,  58,  84,  -1,  -5, 2},
+  {1,   0, -12,  55,  86,   1,  -5, 2},
+  {1,   0, -12,  52,  88,   3,  -6, 2},
+  {0,   1, -12,  49,  90,   5,  -7, 2},
+  {0,   1, -12,  46,  91,   7,  -7, 2},
+  {1,   1, -12,  43,  92,   9,  -8, 2},
+  {0,   1, -12,  40,  93,  12,  -8, 2},
+  {0,   2, -12,  37,  94,  14,  -9, 2},
+  {0,   2, -12,  34,  95,  17, -10, 2},
+  {0,   2, -11,  31,  95,  19, -10, 2},
+  {0,   2, -11,  28,  96,  22, -11, 2}
+};
+
+// Filters for interpolation (0.875-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
+  {3,  -8,  13, 112,  13,  -8,   3, 0},
+  {3,  -7,  10, 112,  17,  -9,   3, -1},
+  {2,  -6,   7, 111,  21,  -9,   3, -1},
+  {2,  -5,   4, 111,  24, -10,   3, -1},
+  {2,  -4,   1, 110,  28, -11,   3, -1},
+  {1,  -3,  -1, 108,  32, -12,   4, -1},
+  {1,  -2,  -3, 106,  36, -13,   4, -1},
+  {1,  -1,  -6, 105,  40, -14,   4, -1},
+  {1,  -1,  -7, 102,  44, -14,   4, -1},
+  {1,   0,  -9, 100,  48, -15,   4, -1},
+  {1,   1, -11,  97,  53, -16,   4, -1},
+  {0,   1, -12,  95,  57, -16,   4, -1},
+  {0,   2, -13,  91,  61, -16,   4, -1},
+  {0,   2, -14,  88,  65, -16,   4, -1},
+  {0,   3, -15,  84,  69, -17,   4, 0},
+  {0,   3, -16,  81,  73, -16,   3, 0},
+  {0,   3, -16,  77,  77, -16,   3, 0},
+  {0,   3, -16,  73,  81, -16,   3, 0},
+  {0,   4, -17,  69,  84, -15,   3, 0},
+  {-1,   4, -16,  65,  88, -14,   2, 0},
+  {-1,   4, -16,  61,  91, -13,   2, 0},
+  {-1,   4, -16,  57,  95, -12,   1, 0},
+  {-1,   4, -16,  53,  97, -11,   1, 1},
+  {-1,   4, -15,  48, 100,  -9,   0, 1},
+  {-1,   4, -14,  44, 102,  -7,  -1, 1},
+  {-1,   4, -14,  40, 105,  -6,  -1, 1},
+  {-1,   4, -13,  36, 106,  -3,  -2, 1},
+  {-1,   4, -12,  32, 108,  -1,  -3, 1},
+  {-1,   3, -11,  28, 110,   1,  -4, 2},
+  {-1,   3, -10,  24, 111,   4,  -5, 2},
+  {-1,   3,  -9,  21, 111,   7,  -6, 2},
+  {-1,   3,  -9,  17, 112,  10,  -7, 3}
+};
+
+// Filters for interpolation (full-band) - no filtering for integer pixels
+const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
+  {0,   0,   0, 128,   0,   0,   0, 0},
+  {0,   1,  -3, 128,   3,  -1,   0, 0},
+  {-1,   2,  -6, 127,   7,  -2,   1, 0},
+  {-1,   3,  -9, 126,  12,  -4,   1, 0},
+  {-1,   4, -12, 125,  16,  -5,   1, 0},
+  {-1,   4, -14, 123,  20,  -6,   2, 0},
+  {-1,   5, -15, 120,  25,  -8,   2, 0},
+  {-1,   5, -17, 118,  30,  -9,   3, -1},
+  {-1,   6, -18, 114,  35, -10,   3, -1},
+  {-1,   6, -19, 111,  41, -12,   3, -1},
+  {-1,   6, -20, 107,  46, -13,   4, -1},
+  {-1,   6, -21, 103,  52, -14,   4, -1},
+  {-1,   6, -21,  99,  57, -16,   5, -1},
+  {-1,   6, -21,  94,  63, -17,   5, -1},
+  {-1,   6, -20,  89,  68, -18,   5, -1},
+  {-1,   6, -20,  84,  73, -19,   6, -1},
+  {-1,   6, -20,  79,  79, -20,   6, -1},
+  {-1,   6, -19,  73,  84, -20,   6, -1},
+  {-1,   5, -18,  68,  89, -20,   6, -1},
+  {-1,   5, -17,  63,  94, -21,   6, -1},
+  {-1,   5, -16,  57,  99, -21,   6, -1},
+  {-1,   4, -14,  52, 103, -21,   6, -1},
+  {-1,   4, -13,  46, 107, -20,   6, -1},
+  {-1,   3, -12,  41, 111, -19,   6, -1},
+  {-1,   3, -10,  35, 114, -18,   6, -1},
+  {-1,   3,  -9,  30, 118, -17,   5, -1},
+  {0,   2,  -8,  25, 120, -15,   5, -1},
+  {0,   2,  -6,  20, 123, -14,   4, -1},
+  {0,   1,  -5,  16, 125, -12,   4, -1},
+  {0,   1,  -4,  12, 126,  -9,   3, -1},
+  {0,   1,  -2,   7, 127,  -6,   2, -1},
+  {0,   0,  -1,   3, 128,  -3,   1, 0}
+};
+
 // Filters for factor of 2 downsampling.
 static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1};
 static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
 
+static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
+  int outlength16 = outlength * 16;
+  if (outlength16 >= inlength * 16)
+    return vp9_filteredinterp_filters1000;
+  else if (outlength16 >= inlength * 13)
+    return vp9_filteredinterp_filters875;
+  else if (outlength16 >= inlength * 11)
+    return vp9_filteredinterp_filters750;
+  else if (outlength16 >= inlength * 9)
+    return vp9_filteredinterp_filters625;
+  else
+    return vp9_filteredinterp_filters500;
+}
+
 static void interpolate(const uint8_t *const input, int inlength,
                         uint8_t *output, int outlength) {
   const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) /
@@ -81,6 +238,9 @@ static void interpolate(const uint8_t *const input, int inlength,
   int x, x1, x2, sum, k, int_pel, sub_pel;
   int64_t y;
 
+  const interp_kernel *interp_filters =
+      choose_interp_filter(inlength, outlength);
+
   x = 0;
   y = offset;
   while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
@@ -101,7 +261,7 @@ static void interpolate(const uint8_t *const input, int inlength,
       const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
-      filter = vp9_filteredinterp_filters[sub_pel];
+      filter = interp_filters[sub_pel];
       sum = 0;
       for (k = 0; k < INTERP_TAPS; ++k) {
         const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
@@ -116,7 +276,7 @@ static void interpolate(const uint8_t *const input, int inlength,
       const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
-      filter = vp9_filteredinterp_filters[sub_pel];
+      filter = interp_filters[sub_pel];
       sum = 0;
       for (k = 0; k < INTERP_TAPS; ++k)
         sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
@@ -129,7 +289,7 @@ static void interpolate(const uint8_t *const input, int inlength,
       const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
-      filter = vp9_filteredinterp_filters[sub_pel];
+      filter = interp_filters[sub_pel];
       sum = 0;
       for (k = 0; k < INTERP_TAPS; ++k)
         sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
@@ -140,7 +300,7 @@ static void interpolate(const uint8_t *const input, int inlength,
       const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
-      filter = vp9_filteredinterp_filters[sub_pel];
+      filter = interp_filters[sub_pel];
       sum = 0;
       for (k = 0; k < INTERP_TAPS; ++k)
         sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
diff --git a/vp9/encoder/vp9_resize.h b/vp9/encoder/vp9_resize.h
index c67595a3f..1818cd47e 100644
--- a/vp9/encoder/vp9_resize.h
+++ b/vp9/encoder/vp9_resize.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -12,6 +12,7 @@
 #define VP9_ENCODER_VP9_RESIZE_H_
 
 #include <stdio.h>
+#include "vpx/vpx_integer.h"
 
 void vp9_resize_plane(const uint8_t *const input,
                       int height,
diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h
index 3c6eb7038..8238892e2 100644
--- a/vp9/encoder/vp9_segmentation.h
+++ b/vp9/encoder/vp9_segmentation.h
@@ -15,6 +15,10 @@
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/encoder/vp9_onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_enable_segmentation(VP9_PTR ptr);
 void vp9_disable_segmentation(VP9_PTR ptr);
 
@@ -45,4 +49,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
 
 void vp9_reset_segment_features(struct segmentation *seg);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_SEGMENTATION_H_
diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h
index 1cafd8775..ab5659bf7 100644
--- a/vp9/encoder/vp9_subexp.h
+++ b/vp9/encoder/vp9_subexp.h
@@ -12,6 +12,10 @@
 #ifndef VP9_ENCODER_VP9_SUBEXP_H_
 #define VP9_ENCODER_VP9_SUBEXP_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_compute_update_table();
 
 
@@ -32,4 +36,8 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
                                               vp9_prob upd,
                                               int b, int r);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_SUBEXP_H_
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index c2eea0aaa..e822e4c64 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -60,7 +60,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             scale,
                             16, 16,
                             which_mv,
-                            &xd->subpix, MV_PRECISION_Q3, x, y);
+                            xd->interp_kernel, MV_PRECISION_Q3, x, y);
 
   vp9_build_inter_predictor(u_mb_ptr, uv_stride,
                             &pred[256], uv_block_size,
@@ -68,7 +68,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             scale,
                             uv_block_size, uv_block_size,
                             which_mv,
-                            &xd->subpix, mv_precision_uv, x, y);
+                            xd->interp_kernel, mv_precision_uv, x, y);
 
   vp9_build_inter_predictor(v_mb_ptr, uv_stride,
                             &pred[512], uv_block_size,
@@ -76,7 +76,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             scale,
                             uv_block_size, uv_block_size,
                             which_mv,
-                            &xd->subpix, mv_precision_uv, x, y);
+                            xd->interp_kernel, mv_precision_uv, x, y);
 }
 
 void vp9_temporal_filter_apply_c(uint8_t *frame1,
@@ -392,7 +392,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
   const int num_frames_backward = distance;
   const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
                                - (num_frames_backward + 1);
-
   struct scale_factors sf;
 
   switch (blur_type) {
@@ -408,7 +407,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
 
     case 2:
       // Forward Blur
-
       frames_to_blur_forward = num_frames_forward;
 
       if (frames_to_blur_forward >= max_frames)
@@ -471,22 +469,24 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
                             strength, &sf);
 }
 
-void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,
-                           const int group_boost) {
+void vp9_configure_arnr_filter(VP9_COMP *cpi,
+                               const unsigned int frames_to_arnr,
+                               const int group_boost) {
   int half_gf_int;
   int frames_after_arf;
   int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
   int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
   int q;
 
-  // Define the arnr filter width for this group of frames:
-  // We only filter frames that lie within a distance of half
-  // the GF interval from the ARF frame. We also have to trap
-  // cases where the filter extends beyond the end of clip.
-  // Note: this_frame->frame has been updated in the loop
-  // so it now points at the ARF frame.
+  // Define the arnr filter width for this group of frames. We only
+  // filter frames that lie within a distance of half the GF interval
+  // from the ARF frame. We also have to trap cases where the filter
+  // extends beyond the end of the lookahead buffer.
+  // Note: frames_to_arnr parameter is the offset of the arnr
+  // frame from the current frame.
   half_gf_int = cpi->rc.baseline_gf_interval >> 1;
-  frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1);
+  frames_after_arf = vp9_lookahead_depth(cpi->lookahead)
+      - frames_to_arnr - 1;
 
   switch (cpi->oxcf.arnr_type) {
     case 1:  // Backward filter
diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h
index c5f3b467e..3028d7884 100644
--- a/vp9/encoder/vp9_temporal_filter.h
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -11,8 +11,17 @@
 #ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
 #define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
-void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,
-                           const int group_boost);
+void vp9_configure_arnr_filter(VP9_COMP *cpi,
+                               const unsigned int frames_to_arnr,
+                               const int group_boost);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
 
 #endif  // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index b04e3fe30..8e646f669 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -301,7 +301,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
   struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};
   if (mbmi->skip_coeff) {
     if (!dry_run)
-      cm->counts.mbskip[ctx][1] += skip_inc;
+      cm->counts.skip[ctx][1] += skip_inc;
     reset_skip_context(xd, bsize);
     if (dry_run)
       *t = t_backup;
@@ -309,7 +309,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
   }
 
   if (!dry_run) {
-    cm->counts.mbskip[ctx][0] += skip_inc;
+    cm->counts.skip[ctx][0] += skip_inc;
     foreach_transformed_block(xd, bsize, tokenize_b, &arg);
   } else {
     foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index 67e6c9d3d..ea86240be 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -16,6 +16,10 @@
 #include "vp9/encoder/vp9_block.h"
 #include "vp9/encoder/vp9_treewriter.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp9_tokenize_initialize();
 
 #define EOSB_TOKEN 127     // Not signalled, encoder only
@@ -50,4 +54,8 @@ extern const int *vp9_dct_value_cost_ptr;
  */
 extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_TOKENIZE_H_
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
index 09f80b0ba..fedfbe937 100644
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -13,6 +13,10 @@
 
 #include "vp9/encoder/vp9_writer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define vp9_cost_zero(prob) (vp9_prob_cost[prob])
 
 #define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
@@ -69,4 +73,8 @@ static INLINE void vp9_write_token(vp9_writer *w, const vp9_tree_index *tree,
   vp9_write_tree(w, tree, probs, token->value, token->len, 0);
 }
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_TREEWRITER_H_
diff --git a/vp9/encoder/vp9_vaq.h b/vp9/encoder/vp9_vaq.h
index c45c479de..c73114aeb 100644
--- a/vp9/encoder/vp9_vaq.h
+++ b/vp9/encoder/vp9_vaq.h
@@ -14,6 +14,10 @@
 
 #include "vp9/encoder/vp9_onyx_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 unsigned int vp9_vaq_segment_id(int energy);
 double vp9_vaq_rdmult_ratio(int energy);
 double vp9_vaq_inv_q_ratio(int energy);
@@ -23,4 +27,8 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi);
 
 int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_VAQ_H_
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 2ded97c55..3bc2091f8 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -12,7 +12,10 @@
 #define VP9_ENCODER_VP9_VARIANCE_H_
 
 #include "vpx/vpx_integer.h"
-// #include "./vpx_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 void variance(const uint8_t *src_ptr,
               int  source_stride,
@@ -112,4 +115,8 @@ static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
     ref += ref_stride;
   }
 }
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_VARIANCE_H_
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h
index dfed90370..5958b4806 100644
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ b/vp9/encoder/vp9_write_bit_buffer.h
@@ -15,6 +15,10 @@
 
 #include "vpx/vpx_integer.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct vp9_write_bit_buffer {
   uint8_t *bit_buffer;
   size_t bit_offset;
@@ -45,4 +49,8 @@ static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb,
 }
 
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_
diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h
index 9cac7a84f..62f555c99 100644
--- a/vp9/encoder/vp9_writer.h
+++ b/vp9/encoder/vp9_writer.h
@@ -15,6 +15,10 @@
 
 #include "vp9/common/vp9_prob.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct {
   unsigned int lowvalue;
   unsigned int range;
@@ -105,4 +109,8 @@ static void vp9_write_literal(vp9_writer *w, int data, int bits) {
 
 #define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8)
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_VP9_WRITER_H_
diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c
index d81b72bba..ea031fb07 100644
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -163,7 +163,7 @@ static INLINE void transpose_4x4_avx2(__m128i *res) {
   res[3] = _mm_unpackhi_epi64(res[2], res[2]);
 }
 
-void fdct4_1d_avx2(__m128i *in) {
+void fdct4_avx2(__m128i *in) {
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@@ -196,7 +196,7 @@ void fdct4_1d_avx2(__m128i *in) {
   transpose_4x4_avx2(in);
 }
 
-void fadst4_1d_avx2(__m128i *in) {
+void fadst4_avx2(__m128i *in) {
   const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
   const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
   const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@@ -250,20 +250,20 @@ void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
   load_buffer_4x4_avx2(input, in, stride);
   switch (tx_type) {
     case 0:  // DCT_DCT
-      fdct4_1d_avx2(in);
-      fdct4_1d_avx2(in);
+      fdct4_avx2(in);
+      fdct4_avx2(in);
       break;
     case 1:  // ADST_DCT
-      fadst4_1d_avx2(in);
-      fdct4_1d_avx2(in);
+      fadst4_avx2(in);
+      fdct4_avx2(in);
       break;
     case 2:  // DCT_ADST
-      fdct4_1d_avx2(in);
-      fadst4_1d_avx2(in);
+      fdct4_avx2(in);
+      fadst4_avx2(in);
       break;
     case 3:  // ADST_ADST
-      fadst4_1d_avx2(in);
-      fadst4_1d_avx2(in);
+      fadst4_avx2(in);
+      fadst4_avx2(in);
       break;
     default:
       assert(0);
@@ -658,7 +658,7 @@ static INLINE void array_transpose_8x8_avx2(__m128i *in, __m128i *res) {
   // 07 17 27 37 47 57 67 77
 }
 
-void fdct8_1d_avx2(__m128i *in) {
+void fdct8_avx2(__m128i *in) {
   // constants
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -798,7 +798,7 @@ void fdct8_1d_avx2(__m128i *in) {
   array_transpose_8x8_avx2(in, in);
 }
 
-void fadst8_1d_avx2(__m128i *in) {
+void fadst8_avx2(__m128i *in) {
   // Constants
   const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
   const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@@ -1034,20 +1034,20 @@ void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
   load_buffer_8x8_avx2(input, in, stride);
   switch (tx_type) {
     case 0:  // DCT_DCT
-      fdct8_1d_avx2(in);
-      fdct8_1d_avx2(in);
+      fdct8_avx2(in);
+      fdct8_avx2(in);
       break;
     case 1:  // ADST_DCT
-      fadst8_1d_avx2(in);
-      fdct8_1d_avx2(in);
+      fadst8_avx2(in);
+      fdct8_avx2(in);
       break;
     case 2:  // DCT_ADST
-      fdct8_1d_avx2(in);
-      fadst8_1d_avx2(in);
+      fdct8_avx2(in);
+      fadst8_avx2(in);
       break;
     case 3:  // ADST_ADST
-      fadst8_1d_avx2(in);
-      fadst8_1d_avx2(in);
+      fadst8_avx2(in);
+      fadst8_avx2(in);
       break;
     default:
       assert(0);
@@ -1216,7 +1216,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
         step1_6 = _mm_sub_epi16(in01, in14);
         step1_7 = _mm_sub_epi16(in00, in15);
       }
-      // Work on the first eight values; fdct8_1d(input, even_results);
+      // Work on the first eight values; fdct8(input, even_results);
       {
         // Add/substract
         const __m128i q0 = _mm_add_epi16(input0, input7);
@@ -1730,7 +1730,7 @@ static INLINE void right_shift_16x16_avx2(__m128i *res0, __m128i *res1) {
   right_shift_8x8_avx2(res1 + 8, 2);
 }
 
-void fdct16_1d_8col_avx2(__m128i *in) {
+void fdct16_8col_avx2(__m128i *in) {
   // perform 16x16 1-D DCT for 8 columns
   __m128i i[8], s[8], p[8], t[8], u[16], v[16];
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
@@ -2052,7 +2052,7 @@ void fdct16_1d_8col_avx2(__m128i *in) {
   in[15] = _mm_packs_epi32(v[14], v[15]);
 }
 
-void fadst16_1d_8col_avx2(__m128i *in) {
+void fadst16_8col_avx2(__m128i *in) {
   // perform 16x16 1-D ADST for 8 columns
   __m128i s[16], x[16], u[32], v[32];
   const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2522,15 +2522,15 @@ void fadst16_1d_8col_avx2(__m128i *in) {
   in[15] = _mm_sub_epi16(kZero, s[1]);
 }
 
-void fdct16_1d_avx2(__m128i *in0, __m128i *in1) {
-  fdct16_1d_8col_avx2(in0);
-  fdct16_1d_8col_avx2(in1);
+void fdct16_avx2(__m128i *in0, __m128i *in1) {
+  fdct16_8col_avx2(in0);
+  fdct16_8col_avx2(in1);
   array_transpose_16x16_avx2(in0, in1);
 }
 
-void fadst16_1d_avx2(__m128i *in0, __m128i *in1) {
-  fadst16_1d_8col_avx2(in0);
-  fadst16_1d_8col_avx2(in1);
+void fadst16_avx2(__m128i *in0, __m128i *in1) {
+  fadst16_8col_avx2(in0);
+  fadst16_8col_avx2(in1);
   array_transpose_16x16_avx2(in0, in1);
 }
 
@@ -2540,24 +2540,24 @@ void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
   load_buffer_16x16_avx2(input, in0, in1, stride);
   switch (tx_type) {
     case 0:  // DCT_DCT
-      fdct16_1d_avx2(in0, in1);
+      fdct16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
-      fdct16_1d_avx2(in0, in1);
+      fdct16_avx2(in0, in1);
       break;
     case 1:  // ADST_DCT
-      fadst16_1d_avx2(in0, in1);
+      fadst16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
-      fdct16_1d_avx2(in0, in1);
+      fdct16_avx2(in0, in1);
       break;
     case 2:  // DCT_ADST
-      fdct16_1d_avx2(in0, in1);
+      fdct16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
-      fadst16_1d_avx2(in0, in1);
+      fadst16_avx2(in0, in1);
       break;
     case 3:  // ADST_ADST
-      fadst16_1d_avx2(in0, in1);
+      fadst16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
-      fadst16_1d_avx2(in0, in1);
+      fadst16_avx2(in0, in1);
       break;
     default:
       assert(0);
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 65431bdbf..c876cc273 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -161,7 +161,7 @@ static INLINE void transpose_4x4(__m128i *res) {
   res[3] = _mm_unpackhi_epi64(res[2], res[2]);
 }
 
-void fdct4_1d_sse2(__m128i *in) {
+void fdct4_sse2(__m128i *in) {
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@@ -194,7 +194,7 @@ void fdct4_1d_sse2(__m128i *in) {
   transpose_4x4(in);
 }
 
-void fadst4_1d_sse2(__m128i *in) {
+void fadst4_sse2(__m128i *in) {
   const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
   const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
   const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@@ -248,20 +248,20 @@ void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
   load_buffer_4x4(input, in, stride);
   switch (tx_type) {
     case 0:  // DCT_DCT
-      fdct4_1d_sse2(in);
-      fdct4_1d_sse2(in);
+      fdct4_sse2(in);
+      fdct4_sse2(in);
       break;
     case 1:  // ADST_DCT
-      fadst4_1d_sse2(in);
-      fdct4_1d_sse2(in);
+      fadst4_sse2(in);
+      fdct4_sse2(in);
       break;
     case 2:  // DCT_ADST
-      fdct4_1d_sse2(in);
-      fadst4_1d_sse2(in);
+      fdct4_sse2(in);
+      fadst4_sse2(in);
       break;
     case 3:  // ADST_ADST
-      fadst4_1d_sse2(in);
-      fadst4_1d_sse2(in);
+      fadst4_sse2(in);
+      fadst4_sse2(in);
       break;
     default:
       assert(0);
@@ -656,7 +656,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
   // 07 17 27 37 47 57 67 77
 }
 
-void fdct8_1d_sse2(__m128i *in) {
+void fdct8_sse2(__m128i *in) {
   // constants
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -796,7 +796,7 @@ void fdct8_1d_sse2(__m128i *in) {
   array_transpose_8x8(in, in);
 }
 
-void fadst8_1d_sse2(__m128i *in) {
+void fadst8_sse2(__m128i *in) {
   // Constants
   const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
   const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@@ -1032,20 +1032,20 @@ void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
   load_buffer_8x8(input, in, stride);
   switch (tx_type) {
     case 0:  // DCT_DCT
-      fdct8_1d_sse2(in);
-      fdct8_1d_sse2(in);
+      fdct8_sse2(in);
+      fdct8_sse2(in);
       break;
     case 1:  // ADST_DCT
-      fadst8_1d_sse2(in);
-      fdct8_1d_sse2(in);
+      fadst8_sse2(in);
+      fdct8_sse2(in);
       break;
     case 2:  // DCT_ADST
-      fdct8_1d_sse2(in);
-      fadst8_1d_sse2(in);
+      fdct8_sse2(in);
+      fadst8_sse2(in);
       break;
     case 3:  // ADST_ADST
-      fadst8_1d_sse2(in);
-      fadst8_1d_sse2(in);
+      fadst8_sse2(in);
+      fadst8_sse2(in);
       break;
     default:
       assert(0);
@@ -1214,7 +1214,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
         step1_6 = _mm_sub_epi16(in01, in14);
         step1_7 = _mm_sub_epi16(in00, in15);
       }
-      // Work on the first eight values; fdct8_1d(input, even_results);
+      // Work on the first eight values; fdct8(input, even_results);
       {
         // Add/substract
         const __m128i q0 = _mm_add_epi16(input0, input7);
@@ -1728,7 +1728,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
   right_shift_8x8(res1 + 8, 2);
 }
 
-void fdct16_1d_8col(__m128i *in) {
+void fdct16_8col(__m128i *in) {
   // perform 16x16 1-D DCT for 8 columns
   __m128i i[8], s[8], p[8], t[8], u[16], v[16];
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
@@ -2050,7 +2050,7 @@ void fdct16_1d_8col(__m128i *in) {
   in[15] = _mm_packs_epi32(v[14], v[15]);
 }
 
-void fadst16_1d_8col(__m128i *in) {
+void fadst16_8col(__m128i *in) {
   // perform 16x16 1-D ADST for 8 columns
   __m128i s[16], x[16], u[32], v[32];
   const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2520,15 +2520,15 @@ void fadst16_1d_8col(__m128i *in) {
   in[15] = _mm_sub_epi16(kZero, s[1]);
 }
 
-void fdct16_1d_sse2(__m128i *in0, __m128i *in1) {
-  fdct16_1d_8col(in0);
-  fdct16_1d_8col(in1);
+void fdct16_sse2(__m128i *in0, __m128i *in1) {
+  fdct16_8col(in0);
+  fdct16_8col(in1);
   array_transpose_16x16(in0, in1);
 }
 
-void fadst16_1d_sse2(__m128i *in0, __m128i *in1) {
-  fadst16_1d_8col(in0);
-  fadst16_1d_8col(in1);
+void fadst16_sse2(__m128i *in0, __m128i *in1) {
+  fadst16_8col(in0);
+  fadst16_8col(in1);
   array_transpose_16x16(in0, in1);
 }
 
@@ -2538,24 +2538,24 @@ void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
   load_buffer_16x16(input, in0, in1, stride);
   switch (tx_type) {
     case 0:  // DCT_DCT
-      fdct16_1d_sse2(in0, in1);
+      fdct16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
-      fdct16_1d_sse2(in0, in1);
+      fdct16_sse2(in0, in1);
       break;
     case 1:  // ADST_DCT
-      fadst16_1d_sse2(in0, in1);
+      fadst16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
-      fdct16_1d_sse2(in0, in1);
+      fdct16_sse2(in0, in1);
       break;
     case 2:  // DCT_ADST
-      fdct16_1d_sse2(in0, in1);
+      fdct16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
-      fadst16_1d_sse2(in0, in1);
+      fadst16_sse2(in0, in1);
       break;
     case 3:  // ADST_ADST
-      fadst16_1d_sse2(in0, in1);
+      fadst16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
-      fadst16_1d_sse2(in0, in1);
+      fadst16_sse2(in0, in1);
       break;
     default:
       assert(0);
diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h
index e1fcf40f2..c15039ad8 100644
--- a/vp9/encoder/x86/vp9_mcomp_x86.h
+++ b/vp9/encoder/x86/vp9_mcomp_x86.h
@@ -12,6 +12,10 @@
 #ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_
 #define VP9_ENCODER_X86_VP9_MCOMP_X86_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if HAVE_SSE3
 #if !CONFIG_RUNTIME_CPU_DETECT
 
@@ -36,5 +40,9 @@
 #endif
 #endif
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VP9_ENCODER_X86_VP9_MCOMP_X86_H_
 
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 897ecd702..6b181710e 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -563,10 +563,21 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t  *ctx,
   unsigned int new_qc;
 
   /* Use best quality mode if no deadline is given. */
-  if (deadline)
-    new_qc = MODE_GOODQUALITY;
-  else
-    new_qc = MODE_BESTQUALITY;
+  new_qc = MODE_BESTQUALITY;
+
+  if (deadline) {
+      uint64_t     duration_us;
+
+      /* Convert duration parameter from stream timebase to microseconds */
+      duration_us = (uint64_t)duration * 1000000
+                    * (uint64_t)ctx->cfg.g_timebase.num
+                    / (uint64_t)ctx->cfg.g_timebase.den;
+
+      /* If the deadline is more that the duration this frame is to be shown,
+       * use good quality mode. Otherwise use realtime mode.
+       */
+      new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME;
+  }
 
   if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
     new_qc = MODE_FIRSTPASS;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index a03f7befc..92c6cd20c 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -59,13 +59,6 @@ struct vpx_codec_alg_priv {
   int                     img_setup;
   int                     img_avail;
   int                     invert_tile_order;
-  int                     fb_lru;
-
-  /* External buffer info to save for VP9 common. */
-  vpx_codec_frame_buffer_t *fb_list;  // External frame buffers
-  int fb_count;  // Total number of frame buffers
-  vpx_realloc_frame_buffer_cb_fn_t realloc_fb_cb;
-  void *user_priv;  // Private data associated with the external frame buffers.
 };
 
 static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si,
@@ -298,32 +291,10 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
         ctx->postproc_cfg.noise_level = 0;
       }
 
-      if (!optr) {
+      if (!optr)
         res = VPX_CODEC_ERROR;
-      } else {
-        VP9D_COMP *const pbi = (VP9D_COMP*)optr;
-        VP9_COMMON *const cm = &pbi->common;
-        if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL &&
-            ctx->fb_count > 0) {
-          cm->fb_list = ctx->fb_list;
-          cm->fb_count = ctx->fb_count;
-          cm->realloc_fb_cb = ctx->realloc_fb_cb;
-          cm->user_priv = ctx->user_priv;
-        } else {
-          cm->fb_count = FRAME_BUFFERS;
-        }
-        cm->fb_lru = ctx->fb_lru;
-        CHECK_MEM_ERROR(cm, cm->yv12_fb,
-                        vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb)));
-        CHECK_MEM_ERROR(cm, cm->fb_idx_ref_cnt,
-                        vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_cnt)));
-        if (cm->fb_lru) {
-          CHECK_MEM_ERROR(cm, cm->fb_idx_ref_lru,
-                          vpx_calloc(cm->fb_count,
-                                     sizeof(*cm->fb_idx_ref_lru)));
-        }
+      else
         ctx->pbi = optr;
-      }
     }
 
     ctx->decoder_init = 1;
@@ -481,28 +452,6 @@ static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t  *ctx,
   return img;
 }
 
-static vpx_codec_err_t vp9_set_frame_buffers(
-    vpx_codec_alg_priv_t *ctx,
-    vpx_codec_frame_buffer_t *fb_list, int fb_count,
-    vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv) {
-  if (fb_count < (VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS)) {
-    /* The application must pass in at least VP9_MAXIMUM_REF_BUFFERS +
-     * VPX_MAXIMUM_WORK_BUFFERS frame buffers. */
-    return VPX_CODEC_INVALID_PARAM;
-  } else if (!ctx->pbi) {
-    /* If the decoder has already been initialized, do not accept external
-     * frame buffers.
-     */
-    ctx->fb_list = fb_list;
-    ctx->fb_count = fb_count;
-    ctx->realloc_fb_cb = cb;
-    ctx->user_priv = user_priv;
-    return VPX_CODEC_OK;
-  }
-
-  return VPX_CODEC_ERROR;
-}
-
 static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx,
                                         vpx_codec_mmap_t *mmap,
                                         vpx_codec_iter_t *iter) {
@@ -713,21 +662,6 @@ static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t set_frame_buffer_lru_cache(vpx_codec_alg_priv_t *ctx,
-                                                  int ctr_id,
-                                                  va_list args) {
-  VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
-
-  // Save for later to pass into vp9 common.
-  ctx->fb_lru = va_arg(args, int);
-
-  if (pbi) {
-    VP9_COMMON *const cm = &pbi->common;
-    cm->fb_lru = ctx->fb_lru;
-  }
-  return VPX_CODEC_OK;
-}
-
 static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
   {VP8_SET_REFERENCE,             set_reference},
   {VP8_COPY_REFERENCE,            copy_reference},
@@ -741,7 +675,6 @@ static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
   {VP9_GET_REFERENCE,             get_reference},
   {VP9D_GET_DISPLAY_SIZE,         get_display_size},
   {VP9_INVERT_TILE_DECODE_ORDER,  set_invert_tile_order},
-  {VP9D_SET_FRAME_BUFFER_LRU_CACHE, set_frame_buffer_lru_cache},
   { -1, NULL},
 };
 
@@ -752,8 +685,7 @@ static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
 CODEC_INTERFACE(vpx_codec_vp9_dx) = {
   "WebM Project VP9 Decoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
-  VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC |
-      VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER,
+  VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC,
   /* vpx_codec_caps_t          caps; */
   vp9_init,         /* vpx_codec_init_fn_t       init; */
   vp9_destroy,      /* vpx_codec_destroy_fn_t    destroy; */
@@ -765,7 +697,6 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
     vp9_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
     vp9_decode,       /* vpx_codec_decode_fn_t     decode; */
     vp9_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
-    vp9_set_frame_buffers,    /* vpx_codec_set_frame_buffers_fn_t  set_fb; */
   },
   { // NOLINT
     /* encoder functions */
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index b722200f7..de210f4b7 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -21,6 +21,8 @@ VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
 VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
 VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h
 VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
+VP9_DX_SRCS-yes += decoder/vp9_dthread.c
+VP9_DX_SRCS-yes += decoder/vp9_dthread.h
 VP9_DX_SRCS-yes += decoder/vp9_reader.h
 VP9_DX_SRCS-yes += decoder/vp9_reader.c
 VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index b637331c3..50f45c200 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -89,14 +89,6 @@ void usage_exit() {
   exit(EXIT_FAILURE);
 }
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-  const char *detail = vpx_codec_error_detail(ctx);
-
-  printf("%s: %s\n", s, vpx_codec_error(ctx));
-  if (detail) printf("    %s\n", detail);
-  exit(EXIT_FAILURE);
-}
-
 static void parse_command_line(int argc, const char **argv_,
                                AppInput *app_input, SvcContext *svc_ctx,
                                vpx_codec_enc_cfg_t *enc_cfg) {
@@ -201,8 +193,6 @@ int main(int argc, const char **argv) {
   vpx_codec_err_t res;
   int pts = 0;            /* PTS starts at 0 */
   int frame_duration = 1; /* 1 timebase tick per frame */
-  vpx_codec_cx_pkt_t packet = {0};
-  packet.kind = VPX_CODEC_CX_FRAME_PKT;
 
   memset(&svc_ctx, 0, sizeof(svc_ctx));
   svc_ctx.log_print = 1;
@@ -242,9 +232,7 @@ int main(int argc, const char **argv) {
       die_codec(&codec, "Failed to encode frame");
     }
     if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
-      packet.data.frame.pts = pts;
-      packet.data.frame.sz = vpx_svc_get_frame_size(&svc_ctx);
-      ivf_write_frame_header(outfile, &packet);
+      ivf_write_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
       (void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
                    vpx_svc_get_frame_size(&svc_ctx), outfile);
     }
diff --git a/vpx/exports_dec b/vpx/exports_dec
index d058c9bdb..ed121f7ec 100644
--- a/vpx/exports_dec
+++ b/vpx/exports_dec
@@ -7,4 +7,3 @@ text vpx_codec_peek_stream_info
 text vpx_codec_register_put_frame_cb
 text vpx_codec_register_put_slice_cb
 text vpx_codec_set_mem_map
-text vpx_codec_set_frame_buffers
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 6c644f045..0f42a1d20 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -47,6 +47,9 @@
 #include "../vpx_encoder.h"
 #include <stdarg.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 /*!\brief Current ABI version number
  *
@@ -56,7 +59,7 @@
  * types, removing or reassigning enums, adding/removing/rearranging
  * fields to structures
  */
-#define VPX_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/
+#define VPX_CODEC_INTERNAL_ABI_VERSION (4) /**<\hideinitializer*/
 
 typedef struct vpx_codec_alg_priv  vpx_codec_alg_priv_t;
 typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t;
@@ -215,37 +218,6 @@ typedef vpx_codec_err_t (*vpx_codec_decode_fn_t)(vpx_codec_alg_priv_t  *ctx,
 typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx,
                                                  vpx_codec_iter_t     *iter);
 
-/*!\brief Pass in external frame buffers for the decoder to use.
- *
- * Registers a given function to be called when the current frame to
- * decode will be bigger than the external frame buffer size. This
- * function must be called before the first call to decode or libvpx
- * will assume the default behavior of allocating frame buffers internally.
- * Frame buffers with a size of 0 are valid.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] fb_list      Pointer to array of frame buffers
- * \param[in] fb_count     Number of elements in frame buffer array
- * \param[in] cb           Pointer to the callback function
- * \param[in] user_priv    User's private data
- *
- * \retval #VPX_CODEC_OK
- *     External frame buffers will be used by libvpx.
- * \retval #VPX_CODEC_INVALID_PARAM
- *     fb_count was less than the value needed by the codec.
- * \retval #VPX_CODEC_ERROR
- *     Decoder context not initialized, or algorithm not capable of
- *     using external frame buffers.
- *
- * \note
- * When decoding VP9, the application must pass in at least
- * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
- * buffers.
- */
-typedef vpx_codec_err_t (*vpx_codec_set_frame_buffers_fn_t)(
-    vpx_codec_alg_priv_t *ctx,
-    vpx_codec_frame_buffer_t *fb_list, int fb_count,
-    vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv);
 
 /*\brief eXternal Memory Allocation memory map get iterator
  *
@@ -336,7 +308,6 @@ struct vpx_codec_iface {
     vpx_codec_get_si_fn_t     get_si;      /**< \copydoc ::vpx_codec_get_si_fn_t */
     vpx_codec_decode_fn_t     decode;      /**< \copydoc ::vpx_codec_decode_fn_t */
     vpx_codec_get_frame_fn_t  get_frame;   /**< \copydoc ::vpx_codec_get_frame_fn_t */
-    vpx_codec_set_frame_buffers_fn_t set_fb; /**< \copydoc ::vpx_codec_set_frame_buffers_fn_t */
   } dec;
   struct vpx_codec_enc_iface {
     vpx_codec_enc_cfg_map_t           *cfg_maps;      /**< \copydoc ::vpx_codec_enc_cfg_map_t */
@@ -476,6 +447,7 @@ vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list,
 
 #include <stdio.h>
 #include <setjmp.h>
+
 struct vpx_internal_error_info {
   vpx_codec_err_t  error_code;
   int              has_detail;
@@ -532,4 +504,8 @@ vpx_codec_err_t vpx_validate_mmaps(const vpx_codec_stream_info_t *si,
                                    const vpx_codec_mmap_t *mmaps,
                                    const mem_req_t *mem_reqs, int nreqs,
                                    vpx_codec_flags_t init_flags);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VPX_INTERNAL_VPX_CODEC_INTERNAL_H_
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index 39fd217ea..a99e48f88 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -226,22 +226,3 @@ vpx_codec_err_t vpx_codec_set_mem_map(vpx_codec_ctx_t   *ctx,
 
   return SAVE_STATUS(ctx, res);
 }
-
-vpx_codec_err_t vpx_codec_set_frame_buffers(
-    vpx_codec_ctx_t *ctx,
-    vpx_codec_frame_buffer_t *fb_list, int fb_count,
-    vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv) {
-  vpx_codec_err_t res;
-
-  if (!ctx || !fb_list || fb_count <= 0 || !cb) {
-    res = VPX_CODEC_INVALID_PARAM;
-  } else if (!ctx->iface || !ctx->priv ||
-             !(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
-    res = VPX_CODEC_ERROR;
-  } else {
-    res = ctx->iface->dec.set_fb(ctx->priv->alg_priv, fb_list, fb_count,
-                                 cb, user_priv);
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index e89c0bcb1..bde77c24d 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -77,13 +77,6 @@ enum vp8_dec_control_id {
   /** For testing. */
   VP9_INVERT_TILE_DECODE_ORDER,
 
-  /** control function to set the vp9 decoder into using the least recently
-   * used frame buffer when a new buffer is requested. Takes an int and if
-   * the value is zero will turn off using lru cache. The value of zero is
-   * the default. If the value is anything besides zero, then that will turn
-   * on lru cache.*/
-  VP9D_SET_FRAME_BUFFER_LRU_CACHE,
-
   VP8_DECODER_CTRL_ID_MAX
 };
 
@@ -115,7 +108,6 @@ VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED,      int *)
 VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR,          vp8_decrypt_init *)
 VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE,       int *)
 VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int)
-VPX_CTRL_USE_TYPE(VP9D_SET_FRAME_BUFFER_LRU_CACHE, int)
 
 /*! @} - end defgroup vp8_decoder */
 
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index df3ff6ef1..549c24908 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -27,7 +27,6 @@ API_DOC_SRCS-yes += vpx_codec.h
 API_DOC_SRCS-yes += vpx_decoder.h
 API_DOC_SRCS-yes += vpx_encoder.h
 API_DOC_SRCS-yes += vpx_image.h
-API_DOC_SRCS-yes += vpx_external_frame_buffer.h
 
 API_SRCS-yes                += src/vpx_decoder.c
 API_SRCS-yes                += vpx_decoder.h
@@ -39,5 +38,4 @@ API_SRCS-yes                += src/vpx_image.c
 API_SRCS-yes                += vpx_codec.h
 API_SRCS-yes                += vpx_codec.mk
 API_SRCS-yes                += vpx_image.h
-API_SRCS-yes                += vpx_external_frame_buffer.h
 API_SRCS-$(BUILD_LIBVPX)    += vpx_integer.h
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 24be82d76..7356baea3 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -30,7 +30,6 @@ extern "C" {
 #endif
 
 #include "./vpx_codec.h"
-#include "./vpx_external_frame_buffer.h"
 
   /*!\brief Current ABI version number
    *
@@ -40,7 +39,7 @@ extern "C" {
    * types, removing or reassigning enums, adding/removing/rearranging
    * fields to structures
    */
-#define VPX_DECODER_ABI_VERSION (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+#define VPX_DECODER_ABI_VERSION (2 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
 
   /*! \brief Decoder capabilities bitfield
    *
@@ -67,8 +66,6 @@ extern "C" {
    */
 #define VPX_CODEC_CAP_FRAME_THREADING   0x200000 /**< Can support frame-based
                                                       multi-threading */
-#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000 /**< Can support external
-                                                      frame buffers */
 
 #define VPX_CODEC_USE_POSTPROC   0x10000 /**< Postprocess decoded frame */
 #define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded
@@ -329,50 +326,6 @@ extern "C" {
 
   /*!@} - end defgroup cap_put_slice*/
 
-  /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions
-   *
-   * The following section is required to be implemented for all decoders
-   * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability.
-   * Calling this function for codecs that don't advertise this capability
-   * will result in an error code being returned, usually VPX_CODEC_ERROR.
-   *
-   * \note
-   * Currently this only works with VP9.
-   * @{
-   */
-
-  /*!\brief Pass in external frame buffers for the decoder to use.
-   *
-   * Registers a given function to be called when the current frame to
-   * decode will be bigger than the external frame buffer size. This
-   * function must be called before the first call to decode or libvpx
-   * will assume the default behavior of allocating frame buffers internally.
-   * Frame buffers with a size of 0 are valid.
-   *
-   * \param[in] ctx          Pointer to this instance's context
-   * \param[in] fb_list      Pointer to array of frame buffers
-   * \param[in] fb_count     Number of elements in frame buffer array
-   * \param[in] cb           Pointer to the callback function
-   * \param[in] user_priv    User's private data
-   *
-   * \retval #VPX_CODEC_OK
-   *     External frame buffers passed into the decoder.
-   * \retval #VPX_CODEC_ERROR
-   *     Decoder context not initialized, or algorithm not capable of
-   *     using external frame buffers.
-   *
-   * \note
-   * When decoding VP9, the application must pass in at least
-   * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
-   * buffers.
-   */
-  vpx_codec_err_t vpx_codec_set_frame_buffers(
-      vpx_codec_ctx_t *ctx,
-      vpx_codec_frame_buffer_t *fb_list, int fb_count,
-      vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv);
-
-  /*!@} - end defgroup cap_external_frame_buffer */
-
   /*!@} - end defgroup decoder*/
 #ifdef __cplusplus
 }
diff --git a/vpx/vpx_external_frame_buffer.h b/vpx/vpx_external_frame_buffer.h
deleted file mode 100644
index 98ce5fdfe..000000000
--- a/vpx/vpx_external_frame_buffer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_VPX_EXTERNAL_FRAME_BUFFER_H_
-#define VPX_VPX_EXTERNAL_FRAME_BUFFER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./vpx_integer.h"
-
-/*!\brief The maximum number of work buffers used by libvpx.
- */
-#define VPX_MAXIMUM_WORK_BUFFERS 1
-
-/*!\brief The maximum number of reference buffers that a VP9 encoder may use.
- */
-#define VP9_MAXIMUM_REF_BUFFERS 8
-
-/*!\brief External frame buffer
- *
- * This structure is used to hold external frame buffers passed into the
- * decoder by the application.
- */
-typedef struct vpx_codec_frame_buffer {
-  uint8_t *data;    /**< Pointer to the data buffer */
-  size_t size;      /**< Size of data in bytes */
-  void *frame_priv;  /**< Frame's private data */
-} vpx_codec_frame_buffer_t;
-
-/*!\brief realloc frame buffer callback prototype
- *
- * This callback is invoked by the decoder to notify the application one
- * of the external frame buffers must increase in size, in order for the
- * decode call to complete. The callback must allocate at least new_size in
- * bytes and assign it to fb->data. Then the callback must set fb->size to
- * the allocated size. The application does not need to align the allocated
- * data. The callback is usually triggered by a frame size change. On success
- * the callback must return 0. Any failure the callback must return a value
- * less than 0.
- *
- * \param[in] user_priv    User's private data
- * \param[in] new_size     Size in bytes needed by the buffer.
- * \param[in/out] fb       Pointer to frame buffer to increase size.
- */
-typedef int (*vpx_realloc_frame_buffer_cb_fn_t)(
-    void *user_priv, size_t new_size, vpx_codec_frame_buffer_t *fb);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VPX_VPX_EXTERNAL_FRAME_BUFFER_H_
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 79e11aa01..d27325cad 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -13,13 +13,13 @@
  * \brief Describes the vpx image descriptor and associated operations
  *
  */
+#ifndef VPX_VPX_IMAGE_H_
+#define VPX_VPX_IMAGE_H_
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#ifndef VPX_VPX_IMAGE_H_
-#define VPX_VPX_IMAGE_H_
-
   /*!\brief Current ABI version number
    *
    * \internal
@@ -237,7 +237,8 @@ extern "C" {
    */
   void vpx_img_free(vpx_image_t *img);
 
-#endif
 #ifdef __cplusplus
-}
+}  // extern "C"
+#endif
+
 #endif  // VPX_VPX_IMAGE_H_
diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h
index 2562d9c1d..1e4a8e288 100644
--- a/vpx_ports/arm.h
+++ b/vpx_ports/arm.h
@@ -14,6 +14,10 @@
 #include <stdlib.h>
 #include "vpx_config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*ARMv5TE "Enhanced DSP" instructions.*/
 #define HAS_EDSP  0x01
 /*ARMv6 "Parallel" or "Media" instructions.*/
@@ -23,5 +27,9 @@
 
 int arm_cpu_caps(void);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VPX_PORTS_ARM_H_
 
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index a9d51a3bc..603e2b66c 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -14,6 +14,10 @@
 #include <stdlib.h>
 #include "vpx_config.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef enum {
   VPX_CPU_UNKNOWN = -1,
   VPX_CPU_AMD,
@@ -256,5 +260,9 @@ x87_set_double_precision(void) {
 
 
 extern void vpx_reset_mmx_state(void);
-#endif  // VPX_PORTS_X86_H_
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_PORTS_X86_H_
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 994e8430c..693125a0f 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -19,18 +19,10 @@
 /****************************************************************************
  *
  ****************************************************************************/
-
-#define yv12_align_addr(addr, align) \
-  (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align))
-
 int
 vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
   if (ybf) {
-    // If libvpx is using external frame buffers then buffer_alloc_sz must
-    // not be set.
-    if (ybf->buffer_alloc_sz > 0) {
-      vpx_free(ybf->buffer_alloc);
-    }
+    vpx_free(ybf->buffer_alloc);
 
     /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
       u_buffer and v_buffer point to buffer_alloc and are used.  Clear out
@@ -116,9 +108,7 @@ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
 
 int vp9_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
   if (ybf) {
-    if (ybf->buffer_alloc_sz > 0) {
-      vpx_free(ybf->buffer_alloc);
-    }
+    vpx_free(ybf->buffer_alloc);
 
     /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
       u_buffer and v_buffer point to buffer_alloc and are used.  Clear out
@@ -133,10 +123,7 @@ int vp9_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
 
 int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                              int width, int height,
-                             int ss_x, int ss_y, int border,
-                             vpx_codec_frame_buffer_t *ext_fb,
-                             vpx_realloc_frame_buffer_cb_fn_t cb,
-                             void *user_priv) {
+                             int ss_x, int ss_y, int border) {
   if (ybf) {
     const int aligned_width = (width + 7) & ~7;
     const int aligned_height = (height + 7) & ~7;
@@ -161,48 +148,25 @@ int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
 #else
     const int frame_size = yplane_size + 2 * uvplane_size;
 #endif
+    if (frame_size > ybf->buffer_alloc_sz) {
+      // Allocation to hold larger frame, or first allocation.
+      if (ybf->buffer_alloc)
+        vpx_free(ybf->buffer_alloc);
+      ybf->buffer_alloc = vpx_memalign(32, frame_size);
+      if (!ybf->buffer_alloc)
+        return -1;
 
-    if (ext_fb != NULL) {
-      const int align_addr_extra_size = 31;
-      const size_t external_frame_size = frame_size + align_addr_extra_size;
-      if (external_frame_size > ext_fb->size) {
-        // Allocation to hold larger frame, or first allocation.
-        if (cb(user_priv, external_frame_size, ext_fb) < 0) {
-          return -1;
-        }
-
-        if (ext_fb->data == NULL || ext_fb->size < external_frame_size) {
-          return -1;
-        }
-
-        // This memset is needed for fixing valgrind error from C loop filter
-        // due to access uninitialized memory in frame border. It could be
-        // removed if border is totally removed.
-        vpx_memset(ext_fb->data, 0, ext_fb->size);
-
-        ybf->buffer_alloc = yv12_align_addr(ext_fb->data, 32);
-      }
-    } else {
-      if (frame_size > ybf->buffer_alloc_sz) {
-        // Allocation to hold larger frame, or first allocation.
-        if (ybf->buffer_alloc)
-          vpx_free(ybf->buffer_alloc);
-        ybf->buffer_alloc = vpx_memalign(32, frame_size);
-        if (!ybf->buffer_alloc)
-          return -1;
-
-        ybf->buffer_alloc_sz = frame_size;
-
-        // This memset is needed for fixing valgrind error from C loop filter
-        // due to access uninitialized memory in frame boarder. It could be
-        // removed if border is totally removed.
-        vpx_memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz);
-      }
+      ybf->buffer_alloc_sz = frame_size;
 
-      if (ybf->buffer_alloc_sz < frame_size)
-        return -1;
+      // This memset is needed for fixing valgrind error from C loop filter
+      // due to access uninitialized memory in frame boarder. It could be
+      // removed if border is totally removed.
+      vpx_memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz);
     }
 
+    if (ybf->buffer_alloc_sz < frame_size)
+      return -1;
+
     /* Only support allocating buffers that have a border that's a multiple
      * of 32. The border restriction is required to get 16-byte alignment of
      * the start of the chroma rows without introducing an arbitrary gap
@@ -250,8 +214,7 @@ int vp9_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                            int ss_x, int ss_y, int border) {
   if (ybf) {
     vp9_free_frame_buffer(ybf);
-    return vp9_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, border,
-                                    NULL, NULL, NULL);
+    return vp9_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, border);
   }
   return -2;
 }
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 610e7d280..8f39eb769 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -15,7 +15,6 @@
 extern "C" {
 #endif
 
-#include "vpx/vpx_external_frame_buffer.h"
 #include "vpx/vpx_integer.h"
 
 #define VP8BORDERINPIXELS           32
@@ -66,20 +65,9 @@ extern "C" {
   int vp9_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                              int width, int height, int ss_x, int ss_y,
                              int border);
-
-  // Updates the yv12 buffer config with the frame buffer. If ext_fb is not
-  // NULL then libvpx is using external frame buffers. The function will
-  // check if the frame buffer is big enough to fit the decoded frame and
-  // try to reallocate the frame buffer. If ext_fb is not NULL and the frame
-  // buffer is not big enough libvpx will call cb with minimum size in bytes.
-  //
-  // Returns 0 on success. Returns < 0 on failure.
   int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                                int width, int height, int ss_x, int ss_y,
-                               int border,
-                               vpx_codec_frame_buffer_t *ext_fb,
-                               vpx_realloc_frame_buffer_cb_fn_t cb,
-                               void *user_priv);
+                               int border);
   int vp9_free_frame_buffer(YV12_BUFFER_CONFIG *ybf);
 
 #ifdef __cplusplus
diff --git a/vpxdec.c b/vpxdec.c
index fc344a162..6d5ca2469 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -39,7 +39,7 @@ static const char *exec_name;
 
 static const struct {
   char const *name;
-  const vpx_codec_iface_t *(*iface)(void);
+  vpx_codec_iface_t *(*iface)(void);
   uint32_t fourcc;
 } ifaces[] = {
 #if CONFIG_VP8_DECODER
@@ -87,10 +87,6 @@ static const arg_def_t error_concealment = ARG_DEF(NULL, "error-concealment", 0,
                                                    "Enable decoder error-concealment");
 static const arg_def_t scalearg = ARG_DEF("S", "scale", 0,
                                             "Scale output frames uniformly");
-static const arg_def_t fb_arg =
-    ARG_DEF(NULL, "frame-buffers", 1, "Number of frame buffers to use");
-static const arg_def_t fb_lru_arg =
-    ARG_DEF(NULL, "frame-buffers-lru", 1, "Turn on/off frame buffer lru");
 
 
 static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
@@ -99,7 +95,7 @@ static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
 static const arg_def_t *all_args[] = {
   &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
   &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile,
-  &threadsarg, &verbosearg, &scalearg, &fb_arg, &fb_lru_arg,
+  &threadsarg, &verbosearg, &scalearg,
   &md5arg,
   &error_concealment,
   NULL
@@ -329,31 +325,6 @@ void show_progress(int frame_in, int frame_out, unsigned long dx_time) {
           (float)frame_out * 1000000.0 / (float)dx_time);
 }
 
-// Called by libvpx if the frame buffer size needs to increase.
-//
-// Parameters:
-// user_priv    Data passed into libvpx.
-// new_size     Minimum size needed by libvpx to decompress the next frame.
-// fb           Pointer to the frame buffer to update.
-//
-// Returns 0 on success. Returns < 0 on failure.
-int realloc_vp9_frame_buffer(void *user_priv, size_t new_size,
-                             vpx_codec_frame_buffer_t *fb) {
-  (void)user_priv;
-  if (!fb)
-    return -1;
-
-  free(fb->data);
-  fb->data = (uint8_t*)malloc(new_size);
-  if (!fb->data) {
-    fb->size = 0;
-    return -1;
-  }
-
-  fb->size = new_size;
-  return 0;
-}
-
 void generate_filename(const char *pattern, char *out, size_t q_len,
                        unsigned int d_w, unsigned int d_h,
                        unsigned int frame_in) {
@@ -499,9 +470,6 @@ int main_loop(int argc, const char **argv_) {
   int                     do_scale = 0;
   vpx_image_t             *scaled_img = NULL;
   int                     frame_avail, got_data;
-  int                     num_external_frame_buffers = 0;
-  int                     fb_lru_cache = 0;
-  vpx_codec_frame_buffer_t *frame_buffers = NULL;
 
   const char *outfile_pattern = NULL;
   char outfile_name[PATH_MAX] = {0};
@@ -568,10 +536,6 @@ int main_loop(int argc, const char **argv_) {
       quiet = 0;
     else if (arg_match(&arg, &scalearg, argi))
       do_scale = 1;
-    else if (arg_match(&arg, &fb_arg, argi))
-      num_external_frame_buffers = arg_parse_uint(&arg);
-    else if (arg_match(&arg, &fb_lru_arg, argi))
-      fb_lru_cache = arg_parse_uint(&arg);
 
 #if CONFIG_VP8_DECODER
     else if (arg_match(&arg, &addnoise_level, argi)) {
@@ -762,30 +726,6 @@ int main_loop(int argc, const char **argv_) {
     arg_skip--;
   }
 
-  if (num_external_frame_buffers > 0) {
-    // Allocate the frame buffer list, setting all of the values to 0.
-    // Including the size of frame buffers. Libvpx will request the
-    // application to realloc the frame buffer data if the size is too small.
-    frame_buffers = (vpx_codec_frame_buffer_t*)calloc(
-        num_external_frame_buffers, sizeof(*frame_buffers));
-    if (vpx_codec_set_frame_buffers(&decoder, frame_buffers,
-                                    num_external_frame_buffers,
-                                    realloc_vp9_frame_buffer,
-                                    NULL)) {
-      fprintf(stderr, "Failed to configure external frame buffers: %s\n",
-              vpx_codec_error(&decoder));
-      return EXIT_FAILURE;
-    }
-  }
-
-  if (fb_lru_cache > 0 &&
-      vpx_codec_control(&decoder, VP9D_SET_FRAME_BUFFER_LRU_CACHE,
-                        fb_lru_cache)) {
-    fprintf(stderr, "Failed to set frame buffer lru cache: %s\n",
-            vpx_codec_error(&decoder));
-    return EXIT_FAILURE;
-  }
-
   frame_avail = 1;
   got_data = 0;
 
@@ -839,13 +779,12 @@ int main_loop(int argc, const char **argv_) {
     if (progress)
       show_progress(frame_in, frame_out, dx_time);
 
-    if (!noblit) {
-      if (frame_out == 1 && img && single_file && !do_md5 && use_y4m)
-        y4m_write_file_header(outfile,
-                              vpx_input_ctx.width, vpx_input_ctx.height,
-                              &vpx_input_ctx.framerate, img->fmt);
+    if (!noblit && img) {
+      const int PLANES_YUV[] = {VPX_PLANE_Y, VPX_PLANE_U, VPX_PLANE_V};
+      const int PLANES_YVU[] = {VPX_PLANE_Y, VPX_PLANE_V, VPX_PLANE_U};
+      const int *planes = flipuv ? PLANES_YVU : PLANES_YUV;
 
-      if (img && do_scale) {
+      if (do_scale) {
         if (frame_out == 1) {
           // If the output frames are to be scaled to a fixed display size then
           // use the width and height specified in the container. If either of
@@ -876,33 +815,49 @@ int main_loop(int argc, const char **argv_) {
         }
       }
 
-      if (img) {
-        const int PLANES_YUV[] = {VPX_PLANE_Y, VPX_PLANE_U, VPX_PLANE_V};
-        const int PLANES_YVU[] = {VPX_PLANE_Y, VPX_PLANE_V, VPX_PLANE_U};
-
-        const int *planes = flipuv ? PLANES_YVU : PLANES_YUV;
+      if (single_file) {
+        if (use_y4m) {
+          char buf[Y4M_BUFFER_SIZE] = {0};
+          size_t len = 0;
+          if (frame_out == 1) {
+            // Y4M file header
+            len = y4m_write_file_header(buf, sizeof(buf),
+                                        vpx_input_ctx.width,
+                                        vpx_input_ctx.height,
+                                        &vpx_input_ctx.framerate, img->fmt);
+            if (do_md5) {
+              MD5Update(&md5_ctx, (md5byte *)buf, len);
+            } else {
+              fputs(buf, outfile);
+            }
+          }
 
-        if (!single_file) {
-          generate_filename(outfile_pattern, outfile_name, PATH_MAX,
-                            img->d_w, img->d_h, frame_in);
+          // Y4M frame header
+          len = y4m_write_frame_header(buf, sizeof(buf));
           if (do_md5) {
-            MD5Init(&md5_ctx);
-            update_image_md5(img, planes, &md5_ctx);
-            MD5Final(md5_digest, &md5_ctx);
-            print_md5(md5_digest, outfile_name);
+            MD5Update(&md5_ctx, (md5byte *)buf, len);
           } else {
-            outfile = open_outfile(outfile_name);
-            write_image_file(img, planes, outfile);
-            fclose(outfile);
+            fputs(buf, outfile);
           }
+        }
+
+        if (do_md5) {
+          update_image_md5(img, planes, &md5_ctx);
         } else {
-          if (do_md5) {
-            update_image_md5(img, planes, &md5_ctx);
-          } else {
-            if (use_y4m)
-              y4m_write_frame_header(outfile);
-            write_image_file(img, planes, outfile);
-          }
+          write_image_file(img, planes, outfile);
+        }
+      } else {
+        generate_filename(outfile_pattern, outfile_name, PATH_MAX,
+                          img->d_w, img->d_h, frame_in);
+        if (do_md5) {
+          MD5Init(&md5_ctx);
+          update_image_md5(img, planes, &md5_ctx);
+          MD5Final(md5_digest, &md5_ctx);
+          print_md5(md5_digest, outfile_name);
+        } else {
+          outfile = open_outfile(outfile_name);
+          write_image_file(img, planes, outfile);
+          fclose(outfile);
         }
       }
     }
@@ -942,10 +897,6 @@ fail:
     free(buf);
 
   if (scaled_img) vpx_img_free(scaled_img);
-  for (i = 0; i < num_external_frame_buffers; ++i) {
-    free(frame_buffers[i].data);
-  }
-  free(frame_buffers);
 
   fclose(infile);
   free(argv);
diff --git a/vpxenc.c b/vpxenc.c
index f19300acf..6ad3b9bd6 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -38,6 +38,7 @@
 #include "./tools_common.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/vpx_timer.h"
+#include "./rate_hist.h"
 #include "./vpxstats.h"
 #include "./warnings.h"
 #include "./webmenc.h"
@@ -61,8 +62,8 @@ static const char *exec_name;
 
 static const struct codec_item {
   char const              *name;
-  const vpx_codec_iface_t *(*iface)(void);
-  const vpx_codec_iface_t *(*dx_iface)(void);
+  vpx_codec_iface_t *(*iface)(void);
+  vpx_codec_iface_t *(*dx_iface)(void);
   unsigned int             fourcc;
 } codecs[] = {
 #if CONFIG_VP8_ENCODER && CONFIG_VP8_DECODER
@@ -472,274 +473,6 @@ void usage_exit() {
   exit(EXIT_FAILURE);
 }
 
-
-#define HIST_BAR_MAX 40
-struct hist_bucket {
-  int low, high, count;
-};
-
-
-static int merge_hist_buckets(struct hist_bucket *bucket,
-                              int *buckets_,
-                              int max_buckets) {
-  int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
-  int buckets = *buckets_;
-  int i;
-
-  /* Find the extrema for this list of buckets */
-  big_bucket = small_bucket = 0;
-  for (i = 0; i < buckets; i++) {
-    if (bucket[i].count < bucket[small_bucket].count)
-      small_bucket = i;
-    if (bucket[i].count > bucket[big_bucket].count)
-      big_bucket = i;
-  }
-
-  /* If we have too many buckets, merge the smallest with an adjacent
-   * bucket.
-   */
-  while (buckets > max_buckets) {
-    int last_bucket = buckets - 1;
-
-    /* merge the small bucket with an adjacent one. */
-    if (small_bucket == 0)
-      merge_bucket = 1;
-    else if (small_bucket == last_bucket)
-      merge_bucket = last_bucket - 1;
-    else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
-      merge_bucket = small_bucket - 1;
-    else
-      merge_bucket = small_bucket + 1;
-
-    assert(abs(merge_bucket - small_bucket) <= 1);
-    assert(small_bucket < buckets);
-    assert(big_bucket < buckets);
-    assert(merge_bucket < buckets);
-
-    if (merge_bucket < small_bucket) {
-      bucket[merge_bucket].high = bucket[small_bucket].high;
-      bucket[merge_bucket].count += bucket[small_bucket].count;
-    } else {
-      bucket[small_bucket].high = bucket[merge_bucket].high;
-      bucket[small_bucket].count += bucket[merge_bucket].count;
-      merge_bucket = small_bucket;
-    }
-
-    assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
-
-    buckets--;
-
-    /* Remove the merge_bucket from the list, and find the new small
-     * and big buckets while we're at it
-     */
-    big_bucket = small_bucket = 0;
-    for (i = 0; i < buckets; i++) {
-      if (i > merge_bucket)
-        bucket[i] = bucket[i + 1];
-
-      if (bucket[i].count < bucket[small_bucket].count)
-        small_bucket = i;
-      if (bucket[i].count > bucket[big_bucket].count)
-        big_bucket = i;
-    }
-
-  }
-
-  *buckets_ = buckets;
-  return bucket[big_bucket].count;
-}
-
-
-static void show_histogram(const struct hist_bucket *bucket,
-                           int                       buckets,
-                           int                       total,
-                           int                       scale) {
-  const char *pat1, *pat2;
-  int i;
-
-  switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
-    case 1:
-    case 2:
-      pat1 = "%4d %2s: ";
-      pat2 = "%4d-%2d: ";
-      break;
-    case 3:
-      pat1 = "%5d %3s: ";
-      pat2 = "%5d-%3d: ";
-      break;
-    case 4:
-      pat1 = "%6d %4s: ";
-      pat2 = "%6d-%4d: ";
-      break;
-    case 5:
-      pat1 = "%7d %5s: ";
-      pat2 = "%7d-%5d: ";
-      break;
-    case 6:
-      pat1 = "%8d %6s: ";
-      pat2 = "%8d-%6d: ";
-      break;
-    case 7:
-      pat1 = "%9d %7s: ";
-      pat2 = "%9d-%7d: ";
-      break;
-    default:
-      pat1 = "%12d %10s: ";
-      pat2 = "%12d-%10d: ";
-      break;
-  }
-
-  for (i = 0; i < buckets; i++) {
-    int len;
-    int j;
-    float pct;
-
-    pct = (float)(100.0 * bucket[i].count / total);
-    len = HIST_BAR_MAX * bucket[i].count / scale;
-    if (len < 1)
-      len = 1;
-    assert(len <= HIST_BAR_MAX);
-
-    if (bucket[i].low == bucket[i].high)
-      fprintf(stderr, pat1, bucket[i].low, "");
-    else
-      fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
-
-    for (j = 0; j < HIST_BAR_MAX; j++)
-      fprintf(stderr, j < len ? "=" : " ");
-    fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
-  }
-}
-
-
-static void show_q_histogram(const int counts[64], int max_buckets) {
-  struct hist_bucket bucket[64];
-  int buckets = 0;
-  int total = 0;
-  int scale;
-  int i;
-
-
-  for (i = 0; i < 64; i++) {
-    if (counts[i]) {
-      bucket[buckets].low = bucket[buckets].high = i;
-      bucket[buckets].count = counts[i];
-      buckets++;
-      total += counts[i];
-    }
-  }
-
-  fprintf(stderr, "\nQuantizer Selection:\n");
-  scale = merge_hist_buckets(bucket, &buckets, max_buckets);
-  show_histogram(bucket, buckets, total, scale);
-}
-
-
-#define RATE_BINS (100)
-struct rate_hist {
-  int64_t            *pts;
-  int                *sz;
-  int                 samples;
-  int                 frames;
-  struct hist_bucket  bucket[RATE_BINS];
-  int                 total;
-};
-
-
-static void init_rate_histogram(struct rate_hist *hist,
-                                const vpx_codec_enc_cfg_t *cfg,
-                                const vpx_rational_t *fps) {
-  int i;
-
-  /* Determine the number of samples in the buffer. Use the file's framerate
-   * to determine the number of frames in rc_buf_sz milliseconds, with an
-   * adjustment (5/4) to account for alt-refs
-   */
-  hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
-
-  /* prevent division by zero */
-  if (hist->samples == 0)
-    hist->samples = 1;
-
-  hist->pts = calloc(hist->samples, sizeof(*hist->pts));
-  hist->sz = calloc(hist->samples, sizeof(*hist->sz));
-  for (i = 0; i < RATE_BINS; i++) {
-    hist->bucket[i].low = INT_MAX;
-    hist->bucket[i].high = 0;
-    hist->bucket[i].count = 0;
-  }
-}
-
-
-static void destroy_rate_histogram(struct rate_hist *hist) {
-  free(hist->pts);
-  free(hist->sz);
-}
-
-
-static void update_rate_histogram(struct rate_hist          *hist,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  const vpx_codec_cx_pkt_t  *pkt) {
-  int i, idx;
-  int64_t now, then, sum_sz = 0, avg_bitrate;
-
-  now = pkt->data.frame.pts * 1000
-        * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
-
-  idx = hist->frames++ % hist->samples;
-  hist->pts[idx] = now;
-  hist->sz[idx] = (int)pkt->data.frame.sz;
-
-  if (now < cfg->rc_buf_initial_sz)
-    return;
-
-  then = now;
-
-  /* Sum the size over the past rc_buf_sz ms */
-  for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) {
-    int i_idx = (i - 1) % hist->samples;
-
-    then = hist->pts[i_idx];
-    if (now - then > cfg->rc_buf_sz)
-      break;
-    sum_sz += hist->sz[i_idx];
-  }
-
-  if (now == then)
-    return;
-
-  avg_bitrate = sum_sz * 8 * 1000 / (now - then);
-  idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
-  if (idx < 0)
-    idx = 0;
-  if (idx > RATE_BINS - 1)
-    idx = RATE_BINS - 1;
-  if (hist->bucket[idx].low > avg_bitrate)
-    hist->bucket[idx].low = (int)avg_bitrate;
-  if (hist->bucket[idx].high < avg_bitrate)
-    hist->bucket[idx].high = (int)avg_bitrate;
-  hist->bucket[idx].count++;
-  hist->total++;
-}
-
-
-static void show_rate_histogram(struct rate_hist          *hist,
-                                const vpx_codec_enc_cfg_t *cfg,
-                                int                        max_buckets) {
-  int i, scale;
-  int buckets = 0;
-
-  for (i = 0; i < RATE_BINS; i++) {
-    if (hist->bucket[i].low == INT_MAX)
-      continue;
-    hist->bucket[buckets++] = hist->bucket[i];
-  }
-
-  fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
-  scale = merge_hist_buckets(hist->bucket, &buckets, max_buckets);
-  show_histogram(hist->bucket, buckets, hist->total, scale);
-}
-
 #define mmin(a, b)  ((a) < (b) ? (a) : (b))
 static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
                           int yloc[4], int uloc[4], int vloc[4]) {
@@ -884,7 +617,7 @@ struct stream_state {
   struct stream_state      *next;
   struct stream_config      config;
   FILE                     *file;
-  struct rate_hist          rate_hist;
+  struct rate_hist         *rate_hist;
   struct EbmlGlobal         ebml;
   uint32_t                  hash;
   uint64_t                  psnr_sse_total;
@@ -1576,7 +1309,7 @@ static void get_cx_data(struct stream_state *stream,
         if (!global->quiet)
           fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz);
 
-        update_rate_histogram(&stream->rate_hist, cfg, pkt);
+        update_rate_histogram(stream->rate_hist, cfg, pkt);
         if (stream->config.write_webm) {
           /* Update the hash */
           if (!stream->ebml.debug)
@@ -1590,7 +1323,7 @@ static void get_cx_data(struct stream_state *stream,
             ivf_header_pos = ftello(stream->file);
             fsize = pkt->data.frame.sz;
 
-            ivf_write_frame_header(stream->file, pkt);
+            ivf_write_frame_header(stream->file, pkt->data.frame.pts, fsize);
           } else {
             fsize += pkt->data.frame.sz;
 
@@ -1881,9 +1614,9 @@ int main(int argc, const char **argv_) {
                       : VPX_IMG_FMT_YV12,
                       input.width, input.height, 32);
 
-      FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
-                                         &stream->config.cfg,
-                                         &global.framerate));
+      FOREACH_STREAM(stream->rate_hist =
+                         init_rate_histogram(&stream->config.cfg,
+                                             &global.framerate));
     }
 
     FOREACH_STREAM(setup_pass(stream, &global, pass));
@@ -2020,10 +1753,10 @@ int main(int argc, const char **argv_) {
                                     global.show_q_hist_buckets));
 
   if (global.show_rate_hist_buckets)
-    FOREACH_STREAM(show_rate_histogram(&stream->rate_hist,
+    FOREACH_STREAM(show_rate_histogram(stream->rate_hist,
                                        &stream->config.cfg,
                                        global.show_rate_hist_buckets));
-  FOREACH_STREAM(destroy_rate_histogram(&stream->rate_hist));
+  FOREACH_STREAM(destroy_rate_histogram(stream->rate_hist));
 
 #if CONFIG_INTERNAL_STATS
   /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now,
diff --git a/vpxenc.h b/vpxenc.h
index 5cb3f853a..5103ee65a 100644
--- a/vpxenc.h
+++ b/vpxenc.h
@@ -12,6 +12,10 @@
 
 #include "vpx/vpx_encoder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 enum TestDecodeFatality {
   TEST_DECODE_OFF,
   TEST_DECODE_FATAL,
@@ -42,4 +46,8 @@ struct VpxEncoderConfig {
   int disable_warning_prompt;
 };
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VPXENC_H_
diff --git a/vpxstats.h b/vpxstats.h
index 18b3acdc0..9ce9c5320 100644
--- a/vpxstats.h
+++ b/vpxstats.h
@@ -15,6 +15,10 @@
 
 #include "vpx/vpx_encoder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* This structure is used to abstract the different ways of handling
  * first pass statistics
  */
@@ -34,4 +38,8 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats);
 
 double vp8_mse2psnr(double samples, double peak, double mse);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VPXSTATS_H_
diff --git a/warnings.h b/warnings.h
index ac3a4b63e..6b8ae6796 100644
--- a/warnings.h
+++ b/warnings.h
@@ -10,6 +10,10 @@
 #ifndef WARNINGS_H_
 #define WARNINGS_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct vpx_codec_enc_cfg;
 struct VpxEncoderConfig;
 
@@ -22,4 +26,8 @@ void check_encoder_config(int disable_prompt,
                           const struct VpxEncoderConfig *global_config,
                           const struct vpx_codec_enc_cfg *stream_config);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // WARNINGS_H_
diff --git a/webmdec.h b/webmdec.h
index 002fbe645..fa5a52eaf 100644
--- a/webmdec.h
+++ b/webmdec.h
@@ -12,6 +12,10 @@
 
 #include "./tools_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct nestegg;
 struct nestegg_packet;
 struct VpxInputContext;
@@ -37,4 +41,8 @@ int webm_guess_framerate(struct WebmInputContext *webm_ctx,
 
 void webm_free(struct WebmInputContext *webm_ctx);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // WEBMDEC_H_
diff --git a/webmenc.h b/webmenc.h
index f3bc3ecd2..362aa895f 100644
--- a/webmenc.h
+++ b/webmenc.h
@@ -23,6 +23,10 @@ typedef __int64 off_t;
 #include "tools_common.h"
 #include "vpx/vpx_encoder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef off_t EbmlLoc;
 
 struct cue_entry {
@@ -84,4 +88,8 @@ void write_webm_block(struct EbmlGlobal *glob,
 
 void write_webm_file_footer(struct EbmlGlobal *glob, int hash);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // WEBMENC_H_
diff --git a/y4menc.c b/y4menc.c
index 8321b432e..8b1c95e2b 100644
--- a/y4menc.c
+++ b/y4menc.c
@@ -10,21 +10,18 @@
 
 #include "./y4menc.h"
 
-void y4m_write_file_header(FILE *file, int width, int height,
-                           const struct VpxRational *framerate,
-                           vpx_img_fmt_t fmt) {
-  const char *color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
-                      fmt == VPX_IMG_FMT_I444 ? "C444\n" :
-                      fmt == VPX_IMG_FMT_I422 ? "C422\n" :
-                      "C420jpeg\n";
+int y4m_write_file_header(char *buf, size_t len, int width, int height,
+                          const struct VpxRational *framerate,
+                          vpx_img_fmt_t fmt) {
+  const char *const color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
+                            fmt == VPX_IMG_FMT_I444 ? "C444\n" :
+                            fmt == VPX_IMG_FMT_I422 ? "C422\n" :
+                            "C420jpeg\n";
 
-  // Note: We can't output an aspect ratio here because IVF doesn't
-  // store one, and neither does VP8.
-  // That will have to wait until these tools support WebM natively.*/
-  fprintf(file, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
-          framerate->numerator, framerate->denominator, 'p', color);
+  return snprintf(buf, len, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
+                  framerate->numerator, framerate->denominator, 'p', color);
 }
 
-void y4m_write_frame_header(FILE *file) {
-  fprintf(file, "FRAME\n");
+int y4m_write_frame_header(char *buf, size_t len) {
+  return snprintf(buf, len, "FRAME\n");
 }
diff --git a/y4menc.h b/y4menc.h
index e5f7978a7..0fabf56eb 100644
--- a/y4menc.h
+++ b/y4menc.h
@@ -11,17 +11,23 @@
 #ifndef Y4MENC_H_
 #define Y4MENC_H_
 
-#include <stdio.h>
-
 #include "./tools_common.h"
 
 #include "vpx/vpx_decoder.h"
 
-void y4m_write_file_header(FILE *file, int width, int height,
-                           const struct VpxRational *framerate,
-                           vpx_img_fmt_t fmt);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define Y4M_BUFFER_SIZE 128
 
-void y4m_write_frame_header(FILE *file);
+int y4m_write_file_header(char *buf, size_t len, int width, int height,
+                          const struct VpxRational *framerate,
+                          vpx_img_fmt_t fmt);
+int y4m_write_frame_header(char *buf, size_t len);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
 
 #endif  // Y4MENC_H_
diff --git a/y4minput.h b/y4minput.h
index 615debe61..d53eb651b 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -17,6 +17,10 @@
 # include <stdio.h>
 # include "vpx/vpx_image.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 
 typedef struct y4m_input y4m_input;
@@ -62,4 +66,8 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
 void y4m_input_close(y4m_input *_y4m);
 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);
 
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // Y4MINPUT_H_