9 files changed, 128 insertions, 35 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 694cac76f..3954fe6a7 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1013,7 +1013,11 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
         ((cpi->svc.number_temporal_layers > 1 &&
          cpi->oxcf.rc_mode == VPX_CBR) ||
         (cpi->svc.number_spatial_layers > 1 &&
-         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {
+         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
+        (is_two_pass_svc(cpi) &&
+         cpi->svc.encode_empty_frame_state == ENCODING &&
+         cpi->svc.layer_context[0].frames_from_key_frame <
+         cpi->svc.number_temporal_layers + 1))) {
       found = 0;
     }
     vp9_wb_write_bit(wb, found);
@@ -1105,8 +1109,7 @@ static void write_uncompressed_header(VP9_COMP *cpi,
     // will change to show_frame flag to 0, then add an one byte frame with
     // show_existing_frame flag which tells the decoder which frame we want to
     // show.
-    if (!cm->show_frame ||
-        (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0))
+    if (!cm->show_frame)
       vp9_wb_write_bit(wb, cm->intra_only);
 
     if (!cm->error_resilient_mode)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index fa509e5bd..197f54cfc 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3159,7 +3159,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
     MODE_INFO *mi = cm->mi + idx_str;
-    MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
     BLOCK_SIZE bsize;
     x->in_static_area = 0;
     x->source_variance = UINT_MAX;
@@ -3197,7 +3196,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                &dummy_rate, &dummy_dist, 1, INT64_MAX,
                                cpi->pc_root);
         } else {
-          copy_partitioning(cm, mi, prev_mi);
+          choose_partitioning(cpi, tile, mi_row, mi_col);
           nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
                               BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
                               cpi->pc_root);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index dea93062b..0928c0bb7 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -225,6 +225,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
   }
   vpx_memset(&cpi->svc.scaled_frames[0], 0,
              MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
+
+  vp9_free_frame_buffer(&cpi->svc.empty_frame.img);
+  vpx_memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
 }
 
 static void save_coding_context(VP9_COMP *cpi) {
@@ -2635,7 +2638,10 @@ void set_frame_size(VP9_COMP *cpi) {
 
   // For two pass encodes analyse the first pass stats and determine
   // the bit allocation and other parameters for this frame / group of frames.
-  if ((oxcf->pass == 2) && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+  if ((oxcf->pass == 2) &&
+      (!cpi->use_svc ||
+       (is_two_pass_svc(cpi) &&
+        cpi->svc.encode_empty_frame_state != ENCODING))) {
     vp9_rc_get_second_pass_params(cpi);
   }
 
@@ -3134,7 +3140,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     }
   }
   if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
+    // Use the last frame context for the empty frame.
     cm->frame_context_idx =
+        (cpi->svc.encode_empty_frame_state == ENCODING) ? FRAME_CONTEXTS - 1 :
         cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
         cpi->svc.temporal_layer_id;
 
@@ -3269,7 +3277,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
 
   cm->last_frame_type = cm->frame_type;
-  vp9_rc_postencode_update(cpi, *size);
+
+  if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
+    vp9_rc_postencode_update(cpi, *size);
 
 #if 0
   output_frame_level_debug_stats(cpi);
@@ -3293,12 +3303,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cm->last_height = cm->height;
 
   // reset to normal state now that we are done.
-  if (!cm->show_existing_frame) {
-    if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0)
-      cm->last_show_frame = 0;
-    else
-      cm->last_show_frame = cm->show_frame;
-  }
+  if (!cm->show_existing_frame)
+    cm->last_show_frame = cm->show_frame;
 
   if (cm->show_frame) {
     vp9_swap_mi_and_prev_mi(cm);
@@ -3335,7 +3341,9 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
                         uint8_t *dest, unsigned int *frame_flags) {
   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
-  vp9_twopass_postencode_update(cpi);
+
+  if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
+    vp9_twopass_postencode_update(cpi);
 }
 
 static void check_initial_width(VP9_COMP *cpi,
@@ -3513,6 +3521,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   if (is_two_pass_svc(cpi)) {
 #if CONFIG_SPATIAL_SVC
     vp9_svc_start_frame(cpi);
+    // Use a small empty frame instead of a real frame
+    if (cpi->svc.encode_empty_frame_state == ENCODING)
+      source = &cpi->svc.empty_frame;
 #endif
     if (oxcf->pass == 2)
       vp9_restore_layer_context(cpi);
@@ -3531,6 +3542,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
   // Should we encode an arf frame.
   arf_src_index = get_arf_src_index(cpi);
+
+  // Skip alt frame if we encode the empty frame
+  if (is_two_pass_svc(cpi) && source != NULL)
+    arf_src_index = 0;
+
   if (arf_src_index) {
     assert(arf_src_index <= rc->frames_to_key);
 
@@ -3818,10 +3834,18 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
 #endif
 
-  if (is_two_pass_svc(cpi) && cm->show_frame) {
-    ++cpi->svc.spatial_layer_to_encode;
-    if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
-      cpi->svc.spatial_layer_to_encode = 0;
+  if (is_two_pass_svc(cpi)) {
+    if (cpi->svc.encode_empty_frame_state == ENCODING)
+      cpi->svc.encode_empty_frame_state = ENCODED;
+
+    if (cm->show_frame) {
+      ++cpi->svc.spatial_layer_to_encode;
+      if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
+        cpi->svc.spatial_layer_to_encode = 0;
+
+      // May need the empty frame after an visible frame.
+      cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
+    }
   }
   return 0;
 }
@@ -3912,10 +3936,6 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
 
   if (width) {
     cm->width = width;
-    if (cm->width * 5 < cpi->initial_width) {
-      cm->width = cpi->initial_width / 5 + 1;
-      printf("Warning: Desired width too small, changed to %d\n", cm->width);
-    }
     if (cm->width > cpi->initial_width) {
       cm->width = cpi->initial_width;
       printf("Warning: Desired width too large, changed to %d\n", cm->width);
@@ -3924,10 +3944,6 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
 
   if (height) {
     cm->height = height;
-    if (cm->height * 5 < cpi->initial_height) {
-      cm->height = cpi->initial_height / 5 + 1;
-      printf("Warning: Desired height too small, changed to %d\n", cm->height);
-    }
     if (cm->height > cpi->initial_height) {
       cm->height = cpi->initial_height;
       printf("Warning: Desired height too large, changed to %d\n", cm->height);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 9e204d7e9..760ec320c 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2406,6 +2406,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
         cpi->ref_frame_flags &=
             (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
         lc->frames_from_key_frame = 0;
+        // Reset the empty frame resolution since we have a key frame.
+        cpi->svc.empty_frame_width = cm->width;
+        cpi->svc.empty_frame_height = cm->height;
       }
     } else {
       cm->frame_type = INTER_FRAME;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 8788be645..bec77d71f 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -271,6 +271,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
     sf->partition_search_type = REFERENCE_PARTITION;
     sf->use_nonrd_pick_mode = 1;
     sf->allow_skip_recode = 0;
+    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
   }
 
   if (speed >= 6) {
@@ -285,10 +289,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
     sf->partition_search_type = VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;
     sf->mv.search_method = NSTEP;
-    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
-    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
-    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
-    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
 
     sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
 
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index eed681c96..1573557d4 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -14,6 +14,8 @@
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_extend.h"
 
+#define SMALL_FRAME_FB_IDX 7
+
 void vp9_init_layer_context(VP9_COMP *const cpi) {
   SVC *const svc = &cpi->svc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -28,6 +30,25 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
     layer_end = svc->number_temporal_layers;
   } else {
     layer_end = svc->number_spatial_layers;
+
+    if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
+      if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
+                                   cpi->common.width, cpi->common.height,
+                                   cpi->common.subsampling_x,
+                                   cpi->common.subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                 cpi->common.use_highbitdepth,
+#endif
+                                 VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate empty frame for multiple frame "
+                           "contexts");
+
+      vpx_memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
+                 cpi->svc.empty_frame.img.buffer_alloc_sz);
+      cpi->svc.empty_frame_width = cpi->common.width;
+      cpi->svc.empty_frame_height = cpi->common.height;
+    }
   }
 
   for (layer = 0; layer < layer_end; ++layer) {
@@ -310,6 +331,47 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
   get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
                        lc->scaling_factor_num, lc->scaling_factor_den,
                        &width, &height);
+
+  // Workaround for multiple frame contexts. In some frames we can't use prev_mi
+  // since its previous frame could be changed during decoding time. The idea is
+  // we put a empty invisible frame in front of them, then we will not use
+  // prev_mi when encoding these frames.
+  if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 &&
+      cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE) {
+    if ((cpi->svc.number_temporal_layers > 1 &&
+         cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) ||
+        (cpi->svc.number_spatial_layers > 1 &&
+         cpi->svc.spatial_layer_id == 0)) {
+      struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 0);
+
+      if (buf != NULL) {
+        cpi->svc.empty_frame.ts_start = buf->ts_start;
+        cpi->svc.empty_frame.ts_end = buf->ts_end;
+        cpi->svc.encode_empty_frame_state = ENCODING;
+        cpi->common.show_frame = 0;
+        cpi->ref_frame_flags = 0;
+        cpi->common.frame_type = INTER_FRAME;
+        cpi->lst_fb_idx =
+            cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
+
+        // Gradually make the empty frame smaller to save bits. Make it half of
+        // its previous size because of the scaling factor restriction.
+        cpi->svc.empty_frame_width >>= 1;
+        cpi->svc.empty_frame_width = (cpi->svc.empty_frame_width + 1) & ~1;
+        if (cpi->svc.empty_frame_width < 16)
+          cpi->svc.empty_frame_width = 16;
+
+        cpi->svc.empty_frame_height >>= 1;
+        cpi->svc.empty_frame_height = (cpi->svc.empty_frame_height + 1) & ~1;
+        if (cpi->svc.empty_frame_height < 16)
+          cpi->svc.empty_frame_height = 16;
+
+        width = cpi->svc.empty_frame_width;
+        height = cpi->svc.empty_frame_height;
+      }
+    }
+  }
+
   if (vp9_set_size_literal(cpi, width, height) != 0)
     return VPX_CODEC_INVALID_PARAM;
 
@@ -317,7 +379,6 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
   cpi->oxcf.best_allowed_q = vp9_quantizer_to_qindex(lc->min_q);
 
   vp9_change_config(cpi, &cpi->oxcf);
-
   vp9_set_high_precision_mv(cpi, 1);
 
   cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 47a5456b6..e9645ce9f 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -50,6 +50,16 @@ typedef struct {
 
   int spatial_layer_to_encode;
 
+  // Workaround for multiple frame contexts
+  enum {
+    ENCODED = 0,
+    ENCODING,
+    NEED_TO_ENCODE
+  }encode_empty_frame_state;
+  struct lookahead_entry empty_frame;
+  int empty_frame_width;
+  int empty_frame_height;
+
   // Store scaled source frames to be used for temporal filter to generate
   // a alt ref frame.
   YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 9ae81e761..5599227ce 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -719,6 +719,9 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
           ++frame_used;
         }
       }
+      cm->mi = cm->mip + cm->mi_stride + 1;
+      cpi->mb.e_mbd.mi = cm->mi;
+      cpi->mb.e_mbd.mi[0].src_mi = &cpi->mb.e_mbd.mi[0];
     } else {
       // ARF is produced at the native frame size and resized when coded.
 #if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 041ba27da..d0ca5242c 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -188,11 +188,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
     }
     if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
       ERROR("Not enough ref buffers for svc alt ref frames");
-    if ((cfg->ss_number_layers > 3 ||
-         cfg->ss_number_layers * cfg->ts_number_layers > 4) &&
+    if (cfg->ss_number_layers * cfg->ts_number_layers > 3 &&
         cfg->g_error_resilient == 0)
-    ERROR("Multiple frame context are not supported for more than 3 spatial "
-          "layers or more than 4 spatial x temporal layers");
+    ERROR("Multiple frame context are not supported for more than 3 layers");
   }
 #endif