12 files changed, 262 insertions, 134 deletions
diff --git a/examples.mk b/examples.mk
index a1d4eb68c..e0da4caa2 100644
--- a/examples.mk
+++ b/examples.mk
@@ -72,6 +72,7 @@ vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h
 vpxdec.SRCS                 += ivfdec.c ivfdec.h
+vpxdec.SRCS                 += y4minput.c y4minput.h
 vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += y4menc.c y4menc.h
 ifeq ($(CONFIG_LIBYUV),yes)
@@ -113,6 +114,7 @@ vpxenc.DESCRIPTION           = Full featured encoder
 EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
 vp9_spatial_svc_encoder.SRCS        += args.c args.h
 vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
+vp9_spatial_svc_encoder.SRCS        += y4minput.c y4minput.h
 vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
 vp9_spatial_svc_encoder.SRCS        += video_common.h
 vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
@@ -129,6 +131,7 @@ endif
 
 EXAMPLES-$(CONFIG_ENCODERS)          += vpx_temporal_svc_encoder.c
 vpx_temporal_svc_encoder.SRCS        += ivfenc.c ivfenc.h
+vpx_temporal_svc_encoder.SRCS        += y4minput.c y4minput.h
 vpx_temporal_svc_encoder.SRCS        += tools_common.c tools_common.h
 vpx_temporal_svc_encoder.SRCS        += video_common.h
 vpx_temporal_svc_encoder.SRCS        += video_writer.h video_writer.c
@@ -138,6 +141,7 @@ vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
 EXAMPLES-$(CONFIG_DECODERS)        += simple_decoder.c
 simple_decoder.GUID                 = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
 simple_decoder.SRCS                += ivfdec.h ivfdec.c
+simple_decoder.SRCS                += y4minput.c y4minput.h
 simple_decoder.SRCS                += tools_common.h tools_common.c
 simple_decoder.SRCS                += video_common.h
 simple_decoder.SRCS                += video_reader.h video_reader.c
@@ -147,6 +151,7 @@ simple_decoder.SRCS                += vpx_ports/msvc.h
 simple_decoder.DESCRIPTION          = Simplified decoder loop
 EXAMPLES-$(CONFIG_DECODERS)        += postproc.c
 postproc.SRCS                      += ivfdec.h ivfdec.c
+postproc.SRCS                      += y4minput.c y4minput.h
 postproc.SRCS                      += tools_common.h tools_common.c
 postproc.SRCS                      += video_common.h
 postproc.SRCS                      += video_reader.h video_reader.c
@@ -158,6 +163,7 @@ postproc.DESCRIPTION                = Decoder postprocessor control
 EXAMPLES-$(CONFIG_DECODERS)        += decode_to_md5.c
 decode_to_md5.SRCS                 += md5_utils.h md5_utils.c
 decode_to_md5.SRCS                 += ivfdec.h ivfdec.c
+decode_to_md5.SRCS                 += y4minput.c y4minput.h
 decode_to_md5.SRCS                 += tools_common.h tools_common.c
 decode_to_md5.SRCS                 += video_common.h
 decode_to_md5.SRCS                 += video_reader.h video_reader.c
@@ -168,6 +174,7 @@ decode_to_md5.GUID                  = 59120B9B-2735-4BFE-B022-146CA340FE42
 decode_to_md5.DESCRIPTION           = Frame by frame MD5 checksum
 EXAMPLES-$(CONFIG_ENCODERS)     += simple_encoder.c
 simple_encoder.SRCS             += ivfenc.h ivfenc.c
+simple_encoder.SRCS             += y4minput.c y4minput.h
 simple_encoder.SRCS             += tools_common.h tools_common.c
 simple_encoder.SRCS             += video_common.h
 simple_encoder.SRCS             += video_writer.h video_writer.c
@@ -176,6 +183,7 @@ simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
 simple_encoder.DESCRIPTION       = Simplified encoder loop
 EXAMPLES-$(CONFIG_VP9_ENCODER)  += vp9_lossless_encoder.c
 vp9_lossless_encoder.SRCS       += ivfenc.h ivfenc.c
+vp9_lossless_encoder.SRCS       += y4minput.c y4minput.h
 vp9_lossless_encoder.SRCS       += tools_common.h tools_common.c
 vp9_lossless_encoder.SRCS       += video_common.h
 vp9_lossless_encoder.SRCS       += video_writer.h video_writer.c
@@ -184,6 +192,7 @@ vp9_lossless_encoder.GUID        = B63C7C88-5348-46DC-A5A6-CC151EF93366
 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
 EXAMPLES-$(CONFIG_ENCODERS)     += twopass_encoder.c
 twopass_encoder.SRCS            += ivfenc.h ivfenc.c
+twopass_encoder.SRCS            += y4minput.c y4minput.h
 twopass_encoder.SRCS            += tools_common.h tools_common.c
 twopass_encoder.SRCS            += video_common.h
 twopass_encoder.SRCS            += video_writer.h video_writer.c
@@ -192,6 +201,7 @@ twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
 EXAMPLES-$(CONFIG_DECODERS)     += decode_with_drops.c
 decode_with_drops.SRCS          += ivfdec.h ivfdec.c
+decode_with_drops.SRCS          += y4minput.c y4minput.h
 decode_with_drops.SRCS          += tools_common.h tools_common.c
 decode_with_drops.SRCS          += video_common.h
 decode_with_drops.SRCS          += video_reader.h video_reader.c
@@ -202,6 +212,7 @@ decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
 EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
 set_maps.SRCS                      += ivfenc.h ivfenc.c
+set_maps.SRCS                      += y4minput.c y4minput.h
 set_maps.SRCS                      += tools_common.h tools_common.c
 set_maps.SRCS                      += video_common.h
 set_maps.SRCS                      += video_writer.h video_writer.c
@@ -210,6 +221,7 @@ set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 set_maps.DESCRIPTION                = Set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
 vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
+vp8cx_set_ref.SRCS                 += y4minput.c y4minput.h
 vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
 vp8cx_set_ref.SRCS                 += video_common.h
 vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
@@ -221,6 +233,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
 ifeq ($(CONFIG_DECODERS),yes)
 EXAMPLES-yes                       += vp9cx_set_ref.c
 vp9cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
+vp9cx_set_ref.SRCS                 += y4minput.c y4minput.h
 vp9cx_set_ref.SRCS                 += tools_common.h tools_common.c
 vp9cx_set_ref.SRCS                 += video_common.h
 vp9cx_set_ref.SRCS                 += video_writer.h video_writer.c
@@ -233,6 +246,7 @@ ifeq ($(CONFIG_MULTI_RES_ENCODING),yes)
 ifeq ($(CONFIG_LIBYUV),yes)
 EXAMPLES-$(CONFIG_VP8_ENCODER)          += vp8_multi_resolution_encoder.c
 vp8_multi_resolution_encoder.SRCS       += ivfenc.h ivfenc.c
+vp8_multi_resolution_encoder.SRCS       += y4minput.c y4minput.h
 vp8_multi_resolution_encoder.SRCS       += tools_common.h tools_common.c
 vp8_multi_resolution_encoder.SRCS       += video_writer.h video_writer.c
 vp8_multi_resolution_encoder.SRCS       += vpx_ports/msvc.h
diff --git a/examples/vp8_multi_resolution_encoder.c b/examples/vp8_multi_resolution_encoder.c
index b14b1ff39..e72f8a019 100644
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -61,7 +61,7 @@ void usage_exit(void) { exit(EXIT_FAILURE); }
 
 int (*read_frame_p)(FILE *f, vpx_image_t *img);
 
-static int read_frame(FILE *f, vpx_image_t *img) {
+static int mulres_read_frame(FILE *f, vpx_image_t *img) {
   size_t nbytes, to_read;
   int res = 1;
 
@@ -75,7 +75,7 @@ static int read_frame(FILE *f, vpx_image_t *img) {
   return res;
 }
 
-static int read_frame_by_row(FILE *f, vpx_image_t *img) {
+static int mulres_read_frame_by_row(FILE *f, vpx_image_t *img) {
   size_t nbytes, to_read;
   int res = 1;
   int plane;
@@ -471,9 +471,9 @@ int main(int argc, char **argv) {
       die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
 
   if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w)
-    read_frame_p = read_frame;
+    read_frame_p = mulres_read_frame;
   else
-    read_frame_p = read_frame_by_row;
+    read_frame_p = mulres_read_frame_by_row;
 
   for (i = 0; i < NUM_ENCODERS; i++)
     if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0);
diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index f8093e1bf..92b310684 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -30,6 +30,8 @@
 #include "vpx/vpx_encoder.h"
 #include "../vpxstats.h"
 #include "vp9/encoder/vp9_encoder.h"
+#include "./y4minput.h"
+
 #define OUTPUT_RC_STATS 1
 
 static const arg_def_t skip_frames_arg =
@@ -161,7 +163,6 @@ static const int32_t default_speed = -1;    // -1 means use library default.
 static const uint32_t default_threads = 0;  // zero means use library default.
 
 typedef struct {
-  const char *input_filename;
   const char *output_filename;
   uint32_t frames_to_code;
   uint32_t frames_to_skip;
@@ -393,10 +394,16 @@ static void parse_command_line(int argc, const char **argv_,
   if (argv[0] == NULL || argv[1] == 0) {
     usage_exit();
   }
-  app_input->input_filename = argv[0];
+  app_input->input_ctx.filename = argv[0];
   app_input->output_filename = argv[1];
   free(argv);
 
+  open_input_file(&app_input->input_ctx);
+  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
+    enc_cfg->g_w = app_input->input_ctx.width;
+    enc_cfg->g_h = app_input->input_ctx.height;
+  }
+
   if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
       enc_cfg->g_h % 2)
     die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
@@ -752,7 +759,6 @@ int main(int argc, const char **argv) {
   vpx_codec_err_t res;
   int pts = 0;            /* PTS starts at 0 */
   int frame_duration = 1; /* 1 timebase tick per frame */
-  FILE *infile = NULL;
   int end_of_stream = 0;
   int frames_received = 0;
 #if OUTPUT_RC_STATS
@@ -773,6 +779,13 @@ int main(int argc, const char **argv) {
   memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
   memset(&rc, 0, sizeof(struct RateControlStats));
   exec_name = argv[0];
+
+  /* Setup default input stream settings */
+  app_input.input_ctx.framerate.numerator = 30;
+  app_input.input_ctx.framerate.denominator = 1;
+  app_input.input_ctx.only_i420 = 1;
+  app_input.input_ctx.bit_depth = 0;
+
   parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
 
 // Allocate image buffer
@@ -789,9 +802,6 @@ int main(int argc, const char **argv) {
   }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-  if (!(infile = fopen(app_input.input_filename, "rb")))
-    die("Failed to open %s for reading\n", app_input.input_filename);
-
   // Initialize codec
   if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
       VPX_CODEC_OK)
@@ -835,7 +845,8 @@ int main(int argc, const char **argv) {
 #endif
 
   // skip initial frames
-  for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile);
+  for (i = 0; i < app_input.frames_to_skip; ++i)
+    read_frame(&app_input.input_ctx, &raw);
 
   if (svc_ctx.speed != -1)
     vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
@@ -875,7 +886,8 @@ int main(int argc, const char **argv) {
     // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example
     // uses the extended API.
     int example_pattern = 0;
-    if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
+    if (frame_cnt >= app_input.frames_to_code ||
+        !read_frame(&app_input.input_ctx, &raw)) {
       // We need one extra vpx_svc_encode call at end of stream to flush
       // encoder and get remaining data
       end_of_stream = 1;
@@ -1091,7 +1103,9 @@ int main(int argc, const char **argv) {
   }
 
   printf("Processed %d frames\n", frame_cnt);
-  fclose(infile);
+
+  close_input_file(&app_input.input_ctx);
+
 #if OUTPUT_RC_STATS
   if (svc_ctx.output_rc_stat) {
     printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index f49ef7b1d..aa2213a5b 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -19,6 +19,7 @@
 #include <string.h>
 
 #include "./vpx_config.h"
+#include "./y4minput.h"
 #include "../vpx_ports/vpx_timer.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
@@ -594,7 +595,7 @@ int main(int argc, char **argv) {
 #endif
   vpx_svc_layer_id_t layer_id;
   const VpxInterface *encoder = NULL;
-  FILE *infile = NULL;
+  struct VpxInputContext input_ctx;
   struct RateControlMetrics rc;
   int64_t cx_time = 0;
   const int min_args_base = 13;
@@ -611,6 +612,13 @@ int main(int argc, char **argv) {
 
   zero(rc.layer_target_bitrate);
   memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
+  memset(&input_ctx, 0, sizeof(input_ctx));
+  /* Setup default input stream settings */
+  input_ctx.framerate.numerator = 30;
+  input_ctx.framerate.denominator = 1;
+  input_ctx.only_i420 = 1;
+  input_ctx.bit_depth = 0;
+
   exec_name = argv[0];
   // Check usage and arguments.
   if (argc < min_args) {
@@ -754,9 +762,18 @@ int main(int argc, char **argv) {
   // Set to layer_target_bitrate for highest layer (total bitrate).
   cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
 
-  // Open input file.
-  if (!(infile = fopen(argv[1], "rb"))) {
-    die("Failed to open %s for reading", argv[1]);
+  input_ctx.filename = argv[1];
+  open_input_file(&input_ctx);
+
+  if (input_ctx.file_type == FILE_TYPE_Y4M) {
+    if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) {
+      die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h);
+    }
+    if (input_ctx.framerate.numerator != cfg.g_timebase.den ||
+        input_ctx.framerate.denominator != cfg.g_timebase.num) {
+      die("Incorrect framerate: numerator %d denominator %d",
+          cfg.g_timebase.num, cfg.g_timebase.den);
+    }
   }
 
   framerate = cfg.g_timebase.den / cfg.g_timebase.num;
@@ -865,7 +882,7 @@ int main(int argc, char **argv) {
     }
     flags = layer_flags[frame_cnt % flag_periodicity];
     if (layering_mode == 0) flags = 0;
-    frame_avail = vpx_img_read(&raw, infile);
+    frame_avail = read_frame(&input_ctx, &raw);
     if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id];
     vpx_usec_timer_start(&timer);
     if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags,
@@ -933,7 +950,7 @@ int main(int argc, char **argv) {
     ++frame_cnt;
     pts += frame_duration;
   }
-  fclose(infile);
+  close_input_file(&input_ctx);
   printout_rate_control_summary(&rc, &cfg, frame_cnt);
   printf("\n");
   printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 7caad9f06..844a12534 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -412,6 +412,8 @@ YUV_RAW_INPUT_HEIGHT=288
 
 Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
 Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
+Y4M_720P_INPUT_WIDTH=1280
+Y4M_720P_INPUT_HEIGHT=720
 
 # Setup a trap function to clean up after tests complete.
 trap cleanup EXIT
diff --git a/test/vpx_temporal_svc_encoder.sh b/test/vpx_temporal_svc_encoder.sh
index fd1045e7f..5e5bac8fa 100755
--- a/test/vpx_temporal_svc_encoder.sh
+++ b/test/vpx_temporal_svc_encoder.sh
@@ -38,6 +38,7 @@ vpx_tsvc_encoder() {
   local output_file="${VPX_TEST_OUTPUT_DIR}/${output_file_base}"
   local timebase_num="1"
   local timebase_den="1000"
+  local timebase_den_y4m="30"
   local speed="6"
   local frame_drop_thresh="30"
   local max_threads="4"
@@ -58,6 +59,12 @@ vpx_tsvc_encoder() {
         "${YUV_RAW_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den}" \
         "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \
         "$@" ${devnull}
+      # Test for y4m input.
+      eval "${VPX_TEST_PREFIX}" "${encoder}" "${Y4M_720P_INPUT}" \
+        "${output_file}" "${codec}" "${Y4M_720P_INPUT_WIDTH}" \
+        "${Y4M_720P_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den_y4m}" \
+        "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \
+        "$@" ${devnull}
     else
       eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" \
         "${output_file}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
diff --git a/tools_common.c b/tools_common.c
index b1c35e815..2cd8a0704 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -46,6 +46,14 @@
     va_end(ap);                        \
   } while (0)
 
+#if CONFIG_ENCODERS
+/* Swallow warnings about unused results of fread/fwrite */
+static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
+  return fread(ptr, size, nmemb, stream);
+}
+#define fread wrap_fread
+#endif
+
 FILE *set_binary_mode(FILE *stream) {
   (void)stream;
 #if defined(_WIN32) || defined(__OS2__)
@@ -264,6 +272,88 @@ double sse_to_psnr(double samples, double peak, double sse) {
   }
 }
 
+#if CONFIG_ENCODERS
+int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
+  FILE *f = input_ctx->file;
+  y4m_input *y4m = &input_ctx->y4m;
+  int shortread = 0;
+
+  if (input_ctx->file_type == FILE_TYPE_Y4M) {
+    if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
+  } else {
+    shortread = read_yuv_frame(input_ctx, img);
+  }
+
+  return !shortread;
+}
+
+int file_is_y4m(const char detect[4]) {
+  if (memcmp(detect, "YUV4", 4) == 0) {
+    return 1;
+  }
+  return 0;
+}
+
+int fourcc_is_ivf(const char detect[4]) {
+  if (memcmp(detect, "DKIF", 4) == 0) {
+    return 1;
+  }
+  return 0;
+}
+
+void open_input_file(struct VpxInputContext *input) {
+  /* Parse certain options from the input file, if possible */
+  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
+                                             : set_binary_mode(stdin);
+
+  if (!input->file) fatal("Failed to open input file");
+
+  if (!fseeko(input->file, 0, SEEK_END)) {
+    /* Input file is seekable. Figure out how long it is, so we can get
+     * progress info.
+     */
+    input->length = ftello(input->file);
+    rewind(input->file);
+  }
+
+  /* Default to 1:1 pixel aspect ratio. */
+  input->pixel_aspect_ratio.numerator = 1;
+  input->pixel_aspect_ratio.denominator = 1;
+
+  /* For RAW input sources, these bytes will applied on the first frame
+   *  in read_frame().
+   */
+  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
+  input->detect.position = 0;
+
+  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
+    if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4,
+                       input->only_i420) >= 0) {
+      input->file_type = FILE_TYPE_Y4M;
+      input->width = input->y4m.pic_w;
+      input->height = input->y4m.pic_h;
+      input->pixel_aspect_ratio.numerator = input->y4m.par_n;
+      input->pixel_aspect_ratio.denominator = input->y4m.par_d;
+      input->framerate.numerator = input->y4m.fps_n;
+      input->framerate.denominator = input->y4m.fps_d;
+      input->fmt = input->y4m.vpx_fmt;
+      input->bit_depth = input->y4m.bit_depth;
+    } else {
+      fatal("Unsupported Y4M stream.");
+    }
+  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
+    fatal("IVF is not supported as input.");
+  } else {
+    input->file_type = FILE_TYPE_RAW;
+  }
+}
+
+void close_input_file(struct VpxInputContext *input) {
+  fclose(input->file);
+  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
+}
+#endif
+
 // TODO(debargha): Consolidate the functions below into a separate file.
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_img_upshift(vpx_image_t *dst, vpx_image_t *src,
diff --git a/tools_common.h b/tools_common.h
index bb169351b..28695e4f1 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -152,6 +152,14 @@ int vpx_img_read(vpx_image_t *img, FILE *file);
 
 double sse_to_psnr(double samples, double peak, double mse);
 
+#if CONFIG_ENCODERS
+int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img);
+int file_is_y4m(const char detect[4]);
+int fourcc_is_ivf(const char detect[4]);
+void open_input_file(struct VpxInputContext *input);
+void close_input_file(struct VpxInputContext *input);
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
 void vpx_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift);
 void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index eaeb3d96e..b8c86ea43 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2583,6 +2583,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
       for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
         vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]);
       }
+      vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
     }
 #endif
     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
@@ -6014,22 +6015,27 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
 }
 
 #if CONFIG_NON_GREEDY_MV
-static void get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
-                                   int frame_idx, int rf_idx, int mi_row,
-                                   int mi_col, struct buf_2d *src,
-                                   struct buf_2d *pre) {
+static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
+                                  int frame_idx, int rf_idx, int mi_row,
+                                  int mi_col, struct buf_2d *src,
+                                  struct buf_2d *pre) {
   const int mb_y_offset =
       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
   YV12_BUFFER_CONFIG *ref_frame = NULL;
   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
   if (ref_frame_idx != -1) {
     ref_frame = gf_picture[ref_frame_idx].frame;
+    src->buf = xd->cur_buf->y_buffer + mb_y_offset;
+    src->stride = xd->cur_buf->y_stride;
+    pre->buf = ref_frame->y_buffer + mb_y_offset;
+    pre->stride = ref_frame->y_stride;
+    assert(src->stride == pre->stride);
+    return 1;
+  } else {
+    printf("invalid ref_frame_idx");
+    assert(ref_frame_idx != -1);
+    return 0;
   }
-  src->buf = xd->cur_buf->y_buffer + mb_y_offset;
-  src->stride = xd->cur_buf->y_stride;
-  pre->buf = ref_frame->y_buffer + mb_y_offset;
-  pre->stride = ref_frame->y_stride;
-  assert(src->stride == pre->stride);
 }
 
 #define kMvPreCheckLines 5
@@ -6135,18 +6141,60 @@ static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
   *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row,
                             mi_col);
   full_mv = get_full_mv(&mv->as_mv);
-  get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
-                         &src, &pre);
-  // TODO(angiebird): Consider subpixel when computing the sse.
-  cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
-                        pre.stride, &sse);
-  return (double)sse;
+  if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
+                             &src, &pre)) {
+    // TODO(angiebird): Consider subpixel when computing the sse.
+    cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
+                          pre.stride, &sse);
+    return (double)sse;
+  } else {
+    assert(0);
+    return 0;
+  }
 }
 
-static double get_mv_cost(int mv_mode) {
-  // TODO(angiebird): Implement this function.
-  (void)mv_mode;
-  return 0;
+static int get_mv_mode_cost(int mv_mode) {
+  // TODO(angiebird): The probabilities are roughly inferred from
+  // default_inter_mode_probs. Check if there is a better way to set the
+  // probabilities.
+  const int zero_mv_prob = 9;
+  const int new_mv_prob = 77;
+  const int ref_mv_prob = 170;
+  assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
+  switch (mv_mode) {
+    case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
+    case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
+    case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
+    case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
+    default: assert(0); return -1;
+  }
+}
+
+static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
+  double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
+                        log2(1 + abs(new_mv->col - ref_mv->col));
+  mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
+  return mv_diff_cost;
+}
+static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
+                          int rf_idx, BLOCK_SIZE bsize, int mi_row,
+                          int mi_col) {
+  double mv_cost = get_mv_mode_cost(mv_mode);
+  if (mv_mode == NEW_MV_MODE) {
+    MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize,
+                                    mi_row, mi_col)
+                    .as_mv;
+    MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, tpl_frame, rf_idx,
+                                        bsize, mi_row, mi_col)
+                        .as_mv;
+    MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, tpl_frame, rf_idx,
+                                     bsize, mi_row, mi_col)
+                     .as_mv;
+    double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
+    double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
+    mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
+  }
+  return mv_cost;
 }
 
 static double rd_cost(int rdmult, int rddiv, double rate, double dist) {
@@ -6160,7 +6208,9 @@ static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
   MACROBLOCKD *xd = &x->e_mbd;
   double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx,
                                tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
-  double mv_cost = get_mv_cost(mv_mode);
+  double mv_cost =
+      get_mv_cost(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col);
+
   return rd_cost(x->rdmult, x->rddiv, mv_cost, mv_dist);
 }
 
@@ -6218,7 +6268,7 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
   // no new mv
   // diagnal scan order
   tmp_idx = 0;
-  for (idx = 0; idx < kMvPreCheckSize; ++idx) {
+  for (idx = 0; idx < kMvPreCheckLines; ++idx) {
     int r;
     for (r = 0; r <= idx; ++r) {
       int c = idx - r;
@@ -6245,7 +6295,7 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
                            &select_mv_arr[mi_row * stride + mi_col]);
   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
   // beforehand.
-  for (idx = 1; idx < kMvPreCheckSize; ++idx) {
+  for (idx = 1; idx < kMvPreCheckLines; ++idx) {
     int r;
     for (r = 0; r <= idx; ++r) {
       int c = idx - r;
@@ -6266,7 +6316,7 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
   tmp_idx = 0;
   if (no_new_mv_rd < new_mv_rd) {
     *rd = no_new_mv_rd;
-    for (idx = 0; idx < kMvPreCheckSize; ++idx) {
+    for (idx = 0; idx < kMvPreCheckLines; ++idx) {
       int r;
       for (r = 0; r <= idx; ++r) {
         int c = idx - r;
@@ -6783,6 +6833,11 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
                 sizeof(
                     *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
       }
+      vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
+      CHECK_MEM_ERROR(
+          cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
+          vpx_calloc(mi_rows * mi_cols * 4,
+                     sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
     }
 #endif
     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 0fdc61649..8cd1e6e31 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1943,8 +1943,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
   // For SVC with quality layers, when QP of lower layer is lower
   // than current layer: force check of GF-ZEROMV before early exit
   // due to skip flag.
-  if (svc->spatial_layer_id > 0 && usable_ref_frame == GOLDEN_FRAME &&
-      no_scaling && cm->base_qindex > svc->lower_layer_qindex + 10)
+  if (svc->spatial_layer_id > 0 && no_scaling &&
+      (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
+      cm->base_qindex > svc->lower_layer_qindex + 10)
     force_test_gf_zeromv = 1;
 
   for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) {
@@ -2407,6 +2408,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
   // Perform intra prediction search, if the best SAD is above a certain
   // threshold.
   if (best_rdc.rdcost == INT64_MAX ||
+      (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->source_variance == 0 &&
+       !x->zero_temp_sad_source) ||
       (scene_change_detected && perform_intra_pred) ||
       ((!force_skip_low_temp_var || bsize < BLOCK_32X32 ||
         x->content_state_sb == kVeryHighSad) &&
@@ -2449,8 +2452,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
       const PREDICTION_MODE this_mode = intra_mode_list[i];
       THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
       int mode_rd_thresh = rd_threshes[mode_index];
+      // For spatially flat blocks, under short_circuit_flat_blocks flag:
+      // only check DC mode for stationary blocks, otherwise also check
+      // H and V mode.
       if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
-          this_mode != DC_PRED) {
+          ((x->zero_temp_sad_source && this_mode != DC_PRED) || i > 2)) {
         continue;
       }
 
diff --git a/vpxdec.c b/vpxdec.c
index 7f544d4bc..c60eb5c30 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -265,8 +265,8 @@ static int raw_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
   return 1;
 }
 
-static int read_frame(struct VpxDecInputContext *input, uint8_t **buf,
-                      size_t *bytes_in_buffer, size_t *buffer_size) {
+static int dec_read_frame(struct VpxDecInputContext *input, uint8_t **buf,
+                          size_t *bytes_in_buffer, size_t *buffer_size) {
   switch (input->vpx_input_ctx->file_type) {
 #if CONFIG_WEBM_IO
     case FILE_TYPE_WEBM:
@@ -806,7 +806,7 @@ static int main_loop(int argc, const char **argv_) {
 
   if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
   while (arg_skip) {
-    if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break;
+    if (dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break;
     arg_skip--;
   }
 
@@ -837,7 +837,7 @@ static int main_loop(int argc, const char **argv_) {
 
     frame_avail = 0;
     if (!stop_after || frame_in < stop_after) {
-      if (!read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) {
+      if (!dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) {
         frame_avail = 1;
         frame_in++;
 
diff --git a/vpxenc.c b/vpxenc.c
index b7841522d..a6d3d41d7 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -50,12 +50,6 @@
 #endif
 #include "./y4minput.h"
 
-/* Swallow warnings about unused results of fread/fwrite */
-static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
-  return fread(ptr, size, nmemb, stream);
-}
-#define fread wrap_fread
-
 static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
                           FILE *stream) {
   return fwrite(ptr, size, nmemb, stream);
@@ -95,34 +89,6 @@ static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal,
   va_end(ap);
 }
 
-static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
-  FILE *f = input_ctx->file;
-  y4m_input *y4m = &input_ctx->y4m;
-  int shortread = 0;
-
-  if (input_ctx->file_type == FILE_TYPE_Y4M) {
-    if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
-  } else {
-    shortread = read_yuv_frame(input_ctx, img);
-  }
-
-  return !shortread;
-}
-
-static int file_is_y4m(const char detect[4]) {
-  if (memcmp(detect, "YUV4", 4) == 0) {
-    return 1;
-  }
-  return 0;
-}
-
-static int fourcc_is_ivf(const char detect[4]) {
-  if (memcmp(detect, "DKIF", 4) == 0) {
-    return 1;
-  }
-  return 0;
-}
-
 static const arg_def_t help =
     ARG_DEF(NULL, "help", 0, "Show usage options and exit");
 static const arg_def_t debugmode =
@@ -1020,57 +986,6 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
   }
 }
 
-static void open_input_file(struct VpxInputContext *input) {
-  /* Parse certain options from the input file, if possible */
-  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
-                                             : set_binary_mode(stdin);
-
-  if (!input->file) fatal("Failed to open input file");
-
-  if (!fseeko(input->file, 0, SEEK_END)) {
-    /* Input file is seekable. Figure out how long it is, so we can get
-     * progress info.
-     */
-    input->length = ftello(input->file);
-    rewind(input->file);
-  }
-
-  /* Default to 1:1 pixel aspect ratio. */
-  input->pixel_aspect_ratio.numerator = 1;
-  input->pixel_aspect_ratio.denominator = 1;
-
-  /* For RAW input sources, these bytes will applied on the first frame
-   *  in read_frame().
-   */
-  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
-  input->detect.position = 0;
-
-  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
-    if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4,
-                       input->only_i420) >= 0) {
-      input->file_type = FILE_TYPE_Y4M;
-      input->width = input->y4m.pic_w;
-      input->height = input->y4m.pic_h;
-      input->pixel_aspect_ratio.numerator = input->y4m.par_n;
-      input->pixel_aspect_ratio.denominator = input->y4m.par_d;
-      input->framerate.numerator = input->y4m.fps_n;
-      input->framerate.denominator = input->y4m.fps_d;
-      input->fmt = input->y4m.vpx_fmt;
-      input->bit_depth = input->y4m.bit_depth;
-    } else
-      fatal("Unsupported Y4M stream.");
-  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
-    fatal("IVF is not supported as input.");
-  } else {
-    input->file_type = FILE_TYPE_RAW;
-  }
-}
-
-static void close_input_file(struct VpxInputContext *input) {
-  fclose(input->file);
-  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
-}
-
 static struct stream_state *new_stream(struct VpxEncoderConfig *global,
                                        struct stream_state *prev) {
   struct stream_state *stream;