7 files changed, 225 insertions, 43 deletions
diff --git a/vpx/internal/vpx_psnr.h b/vpx/internal/vpx_psnr.h
new file mode 100644
index 000000000..07d81bb8d
--- /dev/null
+++ b/vpx/internal/vpx_psnr.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_INTERNAL_VPX_PSNR_H_
+#define VPX_INTERNAL_VPX_PSNR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t
+
+/*!\brief Converts SSE to PSNR
+ *
+ * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).
+ *
+ * \param[in]    samples       Number of samples
+ * \param[in]    peak          Max sample value
+ * \param[in]    sse           Sum of squared errors
+ */
+double vpx_sse_to_psnr(double samples, double peak, double sse);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_INTERNAL_VPX_PSNR_H_
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index adce47637..c7837244f 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -13,6 +13,7 @@
  * VP9 SVC encoding support via libvpx
  */
 
+#include <math.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -40,6 +41,7 @@ _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context);
 #define SUPERFRAME_SLOTS (8)
 #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2)
 #define OPTION_BUFFER_SIZE 256
+#define COMPONENTS 4  // psnr & sse statistics maintained for total, y, u, v
 
 static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27";
 static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16";
@@ -47,16 +49,20 @@ static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16";
 typedef struct SvcInternal {
   char options[OPTION_BUFFER_SIZE];        // set by vpx_svc_set_options
   char quantizers[OPTION_BUFFER_SIZE];     // set by vpx_svc_set_quantizers
+  char quantizers_keyframe[OPTION_BUFFER_SIZE];  // set by
+                                                 // vpx_svc_set_quantizers
   char scale_factors[OPTION_BUFFER_SIZE];  // set by vpx_svc_set_scale_factors
 
   // values extracted from option, quantizers
   int scaling_factor_num[VPX_SS_MAX_LAYERS];
   int scaling_factor_den[VPX_SS_MAX_LAYERS];
+  int quantizer_keyframe[VPX_SS_MAX_LAYERS];
   int quantizer[VPX_SS_MAX_LAYERS];
 
   // accumulated statistics
-  double psnr_in_layer[VPX_SS_MAX_LAYERS];
-  uint32_t bytes_in_layer[VPX_SS_MAX_LAYERS];
+  double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS];   // total/Y/U/V
+  uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
+  uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
 
   // codec encoding values
   int width;    // width of highest layer
@@ -268,7 +274,8 @@ static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx,
 }
 
 static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx,
-                                              const char *quantizer_values) {
+                                              const char *quantizer_values,
+                                              const int is_keyframe) {
   char *input_string;
   char *token;
   const char *delim = ",";
@@ -279,6 +286,11 @@ static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx,
   SvcInternal *const si = get_svc_internal(svc_ctx);
 
   if (quantizer_values == NULL || strlen(quantizer_values) == 0) {
+    if (is_keyframe) {
+      // If there non settings for key frame, we will apply settings from
+      // non key frame. So just simply return here.
+      return VPX_CODEC_INVALID_PARAM;
+    }
     input_string = strdup(DEFAULT_QUANTIZER_VALUES);
   } else {
     input_string = strdup(quantizer_values);
@@ -299,7 +311,12 @@ static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx,
     } else {
       q = 0;
     }
-    si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q;
+    if (is_keyframe) {
+      si->quantizer_keyframe[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers]
+      = q;
+    } else {
+      si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q;
+    }
   }
   if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
     svc_log(svc_ctx, SVC_LOG_ERROR,
@@ -384,6 +401,7 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
   char *option_name;
   char *option_value;
   char *input_ptr;
+  int is_keyframe_qaunt_set = 0;
   vpx_codec_err_t res = VPX_CODEC_OK;
 
   if (options == NULL) return VPX_CODEC_OK;
@@ -409,8 +427,17 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
       res = parse_scale_factors(svc_ctx, option_value);
       if (res != VPX_CODEC_OK) break;
     } else if (strcmp("quantizers", option_name) == 0) {
-      res = parse_quantizer_values(svc_ctx, option_value);
+      res = parse_quantizer_values(svc_ctx, option_value, 0);
+      if (res != VPX_CODEC_OK) break;
+      if (!is_keyframe_qaunt_set) {
+        SvcInternal *const si = get_svc_internal(svc_ctx);
+        memcpy(get_svc_internal(svc_ctx)->quantizer_keyframe, si->quantizer,
+               sizeof(si->quantizer));
+      }
+    } else if (strcmp("quantizers-keyframe", option_name) == 0) {
+      res = parse_quantizer_values(svc_ctx, option_value, 1);
       if (res != VPX_CODEC_OK) break;
+      is_keyframe_qaunt_set = 1;
     } else {
       svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
       res = VPX_CODEC_INVALID_PARAM;
@@ -433,13 +460,19 @@ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) {
 }
 
 vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
-                                       const char *quantizers) {
+                                       const char *quantizers,
+                                       const int is_for_keyframe) {
   SvcInternal *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || quantizers == NULL || si == NULL) {
     return VPX_CODEC_INVALID_PARAM;
   }
-  strncpy(si->quantizers, quantizers, sizeof(si->quantizers));
-  si->quantizers[sizeof(si->quantizers) - 1] = '\0';
+  if (is_for_keyframe) {
+    strncpy(si->quantizers_keyframe, quantizers, sizeof(si->quantizers));
+    si->quantizers_keyframe[sizeof(si->quantizers_keyframe) - 1] = '\0';
+  } else {
+    strncpy(si->quantizers, quantizers, sizeof(si->quantizers));
+    si->quantizers[sizeof(si->quantizers) - 1] = '\0';
+  }
   return VPX_CODEC_OK;
 }
 
@@ -490,9 +523,13 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   // for first frame
   si->layers = svc_ctx->spatial_layers;
 
-  res = parse_quantizer_values(svc_ctx, si->quantizers);
+  res = parse_quantizer_values(svc_ctx, si->quantizers, 0);
   if (res != VPX_CODEC_OK) return res;
 
+  res = parse_quantizer_values(svc_ctx, si->quantizers_keyframe, 1);
+  if (res != VPX_CODEC_OK)
+    memcpy(si->quantizer_keyframe, si->quantizer, sizeof(si->quantizer));
+
   res = parse_scale_factors(svc_ctx, si->scale_factors);
   if (res != VPX_CODEC_OK) return res;
 
@@ -500,6 +537,34 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   res = parse_options(svc_ctx, si->options);
   if (res != VPX_CODEC_OK) return res;
 
+  // Assign target bitrate for each layer. We calculate the ratio
+  // from the resolution for now.
+  // TODO(Minghai): Optimize the mechanism of allocating bits after
+  // implementing svc two pass rate control.
+  if (si->layers > 1) {
+    int i;
+    float total = 0;
+    float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
+
+    for (i = 0; i < si->layers; ++i) {
+      int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers;
+      if (pos < VPX_SS_MAX_LAYERS && si->scaling_factor_den[pos] > 0) {
+        alloc_ratio[i] = (float)(si->scaling_factor_num[pos] * 1.0 /
+            si->scaling_factor_den[pos]);
+
+        alloc_ratio[i] *= alloc_ratio[i];
+        total += alloc_ratio[i];
+      }
+    }
+
+    for (i = 0; i < si->layers; ++i) {
+      if (total > 0) {
+        enc_cfg->ss_target_bitrate[i] = (unsigned int)
+            (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+      }
+    }
+  }
+
   // modify encoder configuration
   enc_cfg->ss_number_layers = si->layers;
   enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
@@ -713,8 +778,15 @@ static void set_svc_parameters(SvcContext *svc_ctx,
     svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n");
   }
   layer_index = layer + VPX_SS_MAX_LAYERS - si->layers;
-  svc_params.min_quantizer = si->quantizer[layer_index];
-  svc_params.max_quantizer = si->quantizer[layer_index];
+
+  if (vpx_svc_is_keyframe(svc_ctx)) {
+    svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
+    svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
+  } else {
+    svc_params.min_quantizer = si->quantizer[layer_index];
+    svc_params.max_quantizer = si->quantizer[layer_index];
+  }
+
   svc_params.distance_from_i_frame = si->frame_within_gop;
 
   // Use buffer i for layer i LST
@@ -814,7 +886,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
       switch (cx_pkt->kind) {
         case VPX_CODEC_CX_FRAME_PKT: {
           const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
-          si->bytes_in_layer[si->layer] += frame_pkt_size;
+          si->bytes_sum[si->layer] += frame_pkt_size;
           svc_log(svc_ctx, SVC_LOG_DEBUG,
                   "SVC frame: %d, layer: %d, size: %u\n",
                   si->encode_frame_count, si->layer, frame_pkt_size);
@@ -832,13 +904,23 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
           break;
         }
         case VPX_CODEC_PSNR_PKT: {
+          int i;
           svc_log(svc_ctx, SVC_LOG_DEBUG,
                   "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
                   "%2.3f  %2.3f  %2.3f  %2.3f \n",
                   si->encode_frame_count, si->layer,
                   cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
                   cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
-          si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
+          svc_log(svc_ctx, SVC_LOG_DEBUG,
+                  "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): "
+                  "%2.3f  %2.3f  %2.3f  %2.3f \n",
+                  si->encode_frame_count, si->layer,
+                  cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1],
+                  cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]);
+          for (i = 0; i < COMPONENTS; i++) {
+            si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i];
+            si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i];
+          }
           break;
         }
         default: {
@@ -916,11 +998,21 @@ void vpx_svc_set_keyframe(SvcContext *svc_ctx) {
   si->frame_within_gop = 0;
 }
 
+static double calc_psnr(double d) {
+  if (d == 0) return 100;
+  return -10.0 * log(d) / log(10.0);
+}
+
 // dump accumulated statistics and reset accumulated values
 const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
   int number_of_frames, number_of_keyframes, encode_frame_count;
-  int i;
+  int i, j;
   uint32_t bytes_total = 0;
+  double scale[COMPONENTS];
+  double psnr[COMPONENTS];
+  double mse[COMPONENTS];
+  double y_scale;
+
   SvcInternal *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || si == NULL) return NULL;
 
@@ -938,12 +1030,36 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
         (i == 1 || i == 3)) {
       number_of_frames -= number_of_keyframes;
     }
-    svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d PSNR=[%2.3f], Bytes=[%u]\n", i,
-            (double)si->psnr_in_layer[i] / number_of_frames,
-            si->bytes_in_layer[i]);
-    bytes_total += si->bytes_in_layer[i];
-    si->psnr_in_layer[i] = 0;
-    si->bytes_in_layer[i] = 0;
+    svc_log(svc_ctx, SVC_LOG_INFO,
+            "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n",
+            i, (double)si->psnr_sum[i][0] / number_of_frames,
+            (double)si->psnr_sum[i][1] / number_of_frames,
+            (double)si->psnr_sum[i][2] / number_of_frames,
+            (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]);
+    // the following psnr calculation is deduced from ffmpeg.c#print_report
+    y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames;
+    scale[1] = y_scale;
+    scale[2] = scale[3] = y_scale / 4;  // U or V
+    scale[0] = y_scale * 1.5;           // total
+
+    for (j = 0; j < COMPONENTS; j++) {
+      psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]);
+      mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j];
+    }
+    svc_log(svc_ctx, SVC_LOG_INFO,
+            "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0],
+            psnr[1], psnr[2], psnr[3]);
+    svc_log(svc_ctx, SVC_LOG_INFO,
+            "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0],
+            mse[1], mse[2], mse[3]);
+
+    bytes_total += si->bytes_sum[i];
+    // clear sums for next time
+    si->bytes_sum[i] = 0;
+    for (j = 0; j < COMPONENTS; ++j) {
+      si->psnr_sum[i][j] = 0;
+      si->sse_sum[i][j] = 0;
+    }
   }
 
   // only display statistics once
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index 23742c8e8..e69d96efb 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -255,8 +255,8 @@ vpx_codec_err_t  vpx_codec_encode(vpx_codec_ctx_t            *ctx,
 }
 
 
-const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t   *ctx,
-                                                vpx_codec_iter_t  *iter) {
+const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx,
+                                                vpx_codec_iter_t *iter) {
   const vpx_codec_cx_pkt_t *pkt = NULL;
 
   if (ctx) {
@@ -271,32 +271,30 @@ const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t   *ctx,
   }
 
   if (pkt && pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
-    /* If the application has specified a destination area for the
-     * compressed data, and the codec has not placed the data there,
-     * and it fits, copy it.
-     */
-    char *dst_buf = ctx->priv->enc.cx_data_dst_buf.buf;
-
-    if (dst_buf
-        && pkt->data.raw.buf != dst_buf
-        && pkt->data.raw.sz
-        + ctx->priv->enc.cx_data_pad_before
-        + ctx->priv->enc.cx_data_pad_after
-        <= ctx->priv->enc.cx_data_dst_buf.sz) {
-      vpx_codec_cx_pkt_t *modified_pkt = &ctx->priv->enc.cx_data_pkt;
-
-      memcpy(dst_buf + ctx->priv->enc.cx_data_pad_before,
-             pkt->data.raw.buf, pkt->data.raw.sz);
+    // If the application has specified a destination area for the
+    // compressed data, and the codec has not placed the data there,
+    // and it fits, copy it.
+    vpx_codec_priv_t *const priv = ctx->priv;
+    char *const dst_buf = (char *)priv->enc.cx_data_dst_buf.buf;
+
+    if (dst_buf &&
+        pkt->data.raw.buf != dst_buf &&
+        pkt->data.raw.sz + priv->enc.cx_data_pad_before +
+            priv->enc.cx_data_pad_after <= priv->enc.cx_data_dst_buf.sz) {
+      vpx_codec_cx_pkt_t *modified_pkt = &priv->enc.cx_data_pkt;
+
+      memcpy(dst_buf + priv->enc.cx_data_pad_before, pkt->data.raw.buf,
+             pkt->data.raw.sz);
       *modified_pkt = *pkt;
       modified_pkt->data.raw.buf = dst_buf;
-      modified_pkt->data.raw.sz += ctx->priv->enc.cx_data_pad_before
-                                   + ctx->priv->enc.cx_data_pad_after;
+      modified_pkt->data.raw.sz += priv->enc.cx_data_pad_before +
+                                       priv->enc.cx_data_pad_after;
       pkt = modified_pkt;
     }
 
     if (dst_buf == pkt->data.raw.buf) {
-      ctx->priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
-      ctx->priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
+      priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
+      priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
     }
   }
 
diff --git a/vpx/src/vpx_psnr.c b/vpx/src/vpx_psnr.c
new file mode 100644
index 000000000..05843acb6
--- /dev/null
+++ b/vpx/src/vpx_psnr.c
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "vpx/internal/vpx_psnr.h"
+
+#define MAX_PSNR 100.0
+
+double vpx_sse_to_psnr(double samples, double peak, double sse) {
+  if (sse > 0.0) {
+    const double psnr = 10.0 * log10(samples * peak * peak / sse);
+    return psnr > MAX_PSNR ? MAX_PSNR : psnr;
+  } else {
+    return MAX_PSNR;
+  }
+}
diff --git a/vpx/svc_context.h b/vpx/svc_context.h
index f675fb684..98474ca91 100644
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -64,7 +64,8 @@ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options);
  * e.g., "60,53,39,33,27"
  */
 vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
-                                       const char *quantizer_values);
+                                       const char *quantizer_values,
+                                       const int is_for_keyframe);
 
 /**
  * Set SVC scale factors
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 111c87e53..98d1d567c 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -34,8 +34,10 @@ API_SRCS-yes                += vpx_decoder.h
 API_SRCS-yes                += src/vpx_encoder.c
 API_SRCS-yes                += vpx_encoder.h
 API_SRCS-yes                += internal/vpx_codec_internal.h
+API_SRCS-yes                += internal/vpx_psnr.h
 API_SRCS-yes                += src/vpx_codec.c
 API_SRCS-yes                += src/vpx_image.c
+API_SRCS-yes                += src/vpx_psnr.c
 API_SRCS-yes                += vpx_codec.h
 API_SRCS-yes                += vpx_codec.mk
 API_SRCS-yes                += vpx_frame_buffer.h
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 1d9f0c9b7..851ff1ae8 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -610,6 +610,13 @@ extern "C" {
      */
     unsigned int           ss_number_layers;
 
+    /*!\brief Target bitrate for each spatial layer.
+     *
+     * These values specify the target coding bitrate to be used for each
+     * spatial layer.
+     */
+    unsigned int           ss_target_bitrate[VPX_SS_MAX_LAYERS];
+
     /*!\brief Number of temporal coding layers.
      *
      * This value specifies the number of temporal layers to be used.