diff options
Diffstat (limited to 'vpx')
-rw-r--r-- | vpx/internal/vpx_psnr.h | 34 | ||||
-rw-r--r-- | vpx/src/svc_encodeframe.c | 156 | ||||
-rw-r--r-- | vpx/src/vpx_encoder.c | 42 | ||||
-rw-r--r-- | vpx/src/vpx_psnr.c | 24 | ||||
-rw-r--r-- | vpx/svc_context.h | 3 | ||||
-rw-r--r-- | vpx/vpx_codec.mk | 2 | ||||
-rw-r--r-- | vpx/vpx_encoder.h | 7 |
7 files changed, 225 insertions, 43 deletions
diff --git a/vpx/internal/vpx_psnr.h b/vpx/internal/vpx_psnr.h new file mode 100644 index 000000000..07d81bb8d --- /dev/null +++ b/vpx/internal/vpx_psnr.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_INTERNAL_VPX_PSNR_H_ +#define VPX_INTERNAL_VPX_PSNR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t + +/*!\brief Converts SSE to PSNR + * + * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). + * + * \param[in] samples Number of samples + * \param[in] peak Max sample value + * \param[in] sse Sum of squared errors + */ +double vpx_sse_to_psnr(double samples, double peak, double sse); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_INTERNAL_VPX_PSNR_H_ diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index adce47637..c7837244f 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -13,6 +13,7 @@ * VP9 SVC encoding support via libvpx */ +#include <math.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -40,6 +41,7 @@ _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); #define SUPERFRAME_SLOTS (8) #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2) #define OPTION_BUFFER_SIZE 256 +#define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27"; static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16"; @@ -47,16 +49,20 @@ static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16"; typedef struct SvcInternal { char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options char quantizers[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_quantizers + char quantizers_keyframe[OPTION_BUFFER_SIZE]; // set by + // vpx_svc_set_quantizers char scale_factors[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_scale_factors // values extracted from option, quantizers int scaling_factor_num[VPX_SS_MAX_LAYERS]; int scaling_factor_den[VPX_SS_MAX_LAYERS]; + int quantizer_keyframe[VPX_SS_MAX_LAYERS]; int quantizer[VPX_SS_MAX_LAYERS]; // accumulated statistics - double psnr_in_layer[VPX_SS_MAX_LAYERS]; - uint32_t bytes_in_layer[VPX_SS_MAX_LAYERS]; + double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V + uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; + uint32_t bytes_sum[VPX_SS_MAX_LAYERS]; // codec encoding values int width; // width of highest layer @@ -268,7 +274,8 @@ static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx, } static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, - const char *quantizer_values) { + const char *quantizer_values, + const int is_keyframe) { char *input_string; char *token; const char *delim = ","; @@ -279,6 +286,11 @@ static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, SvcInternal *const si = get_svc_internal(svc_ctx); if (quantizer_values == NULL || strlen(quantizer_values) == 0) { + if (is_keyframe) { + // If there non settings for key frame, we will apply settings from + // non key frame. So just simply return here. + return VPX_CODEC_INVALID_PARAM; + } input_string = strdup(DEFAULT_QUANTIZER_VALUES); } else { input_string = strdup(quantizer_values); @@ -299,7 +311,12 @@ static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, } else { q = 0; } - si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q; + if (is_keyframe) { + si->quantizer_keyframe[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] + = q; + } else { + si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q; + } } if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { svc_log(svc_ctx, SVC_LOG_ERROR, @@ -384,6 +401,7 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { char *option_name; char *option_value; char *input_ptr; + int is_keyframe_qaunt_set = 0; vpx_codec_err_t res = VPX_CODEC_OK; if (options == NULL) return VPX_CODEC_OK; @@ -409,8 +427,17 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { res = parse_scale_factors(svc_ctx, option_value); if (res != VPX_CODEC_OK) break; } else if (strcmp("quantizers", option_name) == 0) { - res = parse_quantizer_values(svc_ctx, option_value); + res = parse_quantizer_values(svc_ctx, option_value, 0); + if (res != VPX_CODEC_OK) break; + if (!is_keyframe_qaunt_set) { + SvcInternal *const si = get_svc_internal(svc_ctx); + memcpy(get_svc_internal(svc_ctx)->quantizer_keyframe, si->quantizer, + sizeof(si->quantizer)); + } + } else if (strcmp("quantizers-keyframe", option_name) == 0) { + res = parse_quantizer_values(svc_ctx, option_value, 1); if (res != VPX_CODEC_OK) break; + is_keyframe_qaunt_set = 1; } else { svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); res = VPX_CODEC_INVALID_PARAM; @@ -433,13 +460,19 @@ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { } vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx, - const char *quantizers) { + const char *quantizers, + const int is_for_keyframe) { SvcInternal *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || quantizers == NULL || si == NULL) { return VPX_CODEC_INVALID_PARAM; } - strncpy(si->quantizers, quantizers, sizeof(si->quantizers)); - si->quantizers[sizeof(si->quantizers) - 1] = '\0'; + if (is_for_keyframe) { + strncpy(si->quantizers_keyframe, quantizers, sizeof(si->quantizers)); + si->quantizers_keyframe[sizeof(si->quantizers_keyframe) - 1] = '\0'; + } else { + strncpy(si->quantizers, quantizers, sizeof(si->quantizers)); + si->quantizers[sizeof(si->quantizers) - 1] = '\0'; + } return VPX_CODEC_OK; } @@ -490,9 +523,13 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, // for first frame si->layers = svc_ctx->spatial_layers; - res = parse_quantizer_values(svc_ctx, si->quantizers); + res = parse_quantizer_values(svc_ctx, si->quantizers, 0); if (res != VPX_CODEC_OK) return res; + res = parse_quantizer_values(svc_ctx, si->quantizers_keyframe, 1); + if (res != VPX_CODEC_OK) + memcpy(si->quantizer_keyframe, si->quantizer, sizeof(si->quantizer)); + res = parse_scale_factors(svc_ctx, si->scale_factors); if (res != VPX_CODEC_OK) return res; @@ -500,6 +537,34 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, res = parse_options(svc_ctx, si->options); if (res != VPX_CODEC_OK) return res; + // Assign target bitrate for each layer. We calculate the ratio + // from the resolution for now. + // TODO(Minghai): Optimize the mechanism of allocating bits after + // implementing svc two pass rate control. + if (si->layers > 1) { + int i; + float total = 0; + float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; + + for (i = 0; i < si->layers; ++i) { + int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers; + if (pos < VPX_SS_MAX_LAYERS && si->scaling_factor_den[pos] > 0) { + alloc_ratio[i] = (float)(si->scaling_factor_num[pos] * 1.0 / + si->scaling_factor_den[pos]); + + alloc_ratio[i] *= alloc_ratio[i]; + total += alloc_ratio[i]; + } + } + + for (i = 0; i < si->layers; ++i) { + if (total > 0) { + enc_cfg->ss_target_bitrate[i] = (unsigned int) + (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); + } + } + } + // modify encoder configuration enc_cfg->ss_number_layers = si->layers; enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. @@ -713,8 +778,15 @@ static void set_svc_parameters(SvcContext *svc_ctx, svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n"); } layer_index = layer + VPX_SS_MAX_LAYERS - si->layers; - svc_params.min_quantizer = si->quantizer[layer_index]; - svc_params.max_quantizer = si->quantizer[layer_index]; + + if (vpx_svc_is_keyframe(svc_ctx)) { + svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; + svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; + } else { + svc_params.min_quantizer = si->quantizer[layer_index]; + svc_params.max_quantizer = si->quantizer[layer_index]; + } + svc_params.distance_from_i_frame = si->frame_within_gop; // Use buffer i for layer i LST @@ -814,7 +886,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz); - si->bytes_in_layer[si->layer] += frame_pkt_size; + si->bytes_sum[si->layer] += frame_pkt_size; svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, size: %u\n", si->encode_frame_count, si->layer, frame_pkt_size); @@ -832,13 +904,23 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, break; } case VPX_CODEC_PSNR_PKT: { + int i; svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " "%2.3f %2.3f %2.3f %2.3f \n", si->encode_frame_count, si->layer, cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1], cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]); - si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0]; + svc_log(svc_ctx, SVC_LOG_DEBUG, + "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + si->encode_frame_count, si->layer, + cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1], + cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]); + for (i = 0; i < COMPONENTS; i++) { + si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i]; + si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i]; + } break; } default: { @@ -916,11 +998,21 @@ void vpx_svc_set_keyframe(SvcContext *svc_ctx) { si->frame_within_gop = 0; } +static double calc_psnr(double d) { + if (d == 0) return 100; + return -10.0 * log(d) / log(10.0); +} + // dump accumulated statistics and reset accumulated values const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { int number_of_frames, number_of_keyframes, encode_frame_count; - int i; + int i, j; uint32_t bytes_total = 0; + double scale[COMPONENTS]; + double psnr[COMPONENTS]; + double mse[COMPONENTS]; + double y_scale; + SvcInternal *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || si == NULL) return NULL; @@ -938,12 +1030,36 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { (i == 1 || i == 3)) { number_of_frames -= number_of_keyframes; } - svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d PSNR=[%2.3f], Bytes=[%u]\n", i, - (double)si->psnr_in_layer[i] / number_of_frames, - si->bytes_in_layer[i]); - bytes_total += si->bytes_in_layer[i]; - si->psnr_in_layer[i] = 0; - si->bytes_in_layer[i] = 0; + svc_log(svc_ctx, SVC_LOG_INFO, + "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n", + i, (double)si->psnr_sum[i][0] / number_of_frames, + (double)si->psnr_sum[i][1] / number_of_frames, + (double)si->psnr_sum[i][2] / number_of_frames, + (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); + // the following psnr calculation is deduced from ffmpeg.c#print_report + y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames; + scale[1] = y_scale; + scale[2] = scale[3] = y_scale / 4; // U or V + scale[0] = y_scale * 1.5; // total + + for (j = 0; j < COMPONENTS; j++) { + psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]); + mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j]; + } + svc_log(svc_ctx, SVC_LOG_INFO, + "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0], + psnr[1], psnr[2], psnr[3]); + svc_log(svc_ctx, SVC_LOG_INFO, + "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0], + mse[1], mse[2], mse[3]); + + bytes_total += si->bytes_sum[i]; + // clear sums for next time + si->bytes_sum[i] = 0; + for (j = 0; j < COMPONENTS; ++j) { + si->psnr_sum[i][j] = 0; + si->sse_sum[i][j] = 0; + } } // only display statistics once diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c index 23742c8e8..e69d96efb 100644 --- a/vpx/src/vpx_encoder.c +++ b/vpx/src/vpx_encoder.c @@ -255,8 +255,8 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, } -const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter) { +const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, + vpx_codec_iter_t *iter) { const vpx_codec_cx_pkt_t *pkt = NULL; if (ctx) { @@ -271,32 +271,30 @@ const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, } if (pkt && pkt->kind == VPX_CODEC_CX_FRAME_PKT) { - /* If the application has specified a destination area for the - * compressed data, and the codec has not placed the data there, - * and it fits, copy it. - */ - char *dst_buf = ctx->priv->enc.cx_data_dst_buf.buf; - - if (dst_buf - && pkt->data.raw.buf != dst_buf - && pkt->data.raw.sz - + ctx->priv->enc.cx_data_pad_before - + ctx->priv->enc.cx_data_pad_after - <= ctx->priv->enc.cx_data_dst_buf.sz) { - vpx_codec_cx_pkt_t *modified_pkt = &ctx->priv->enc.cx_data_pkt; - - memcpy(dst_buf + ctx->priv->enc.cx_data_pad_before, - pkt->data.raw.buf, pkt->data.raw.sz); + // If the application has specified a destination area for the + // compressed data, and the codec has not placed the data there, + // and it fits, copy it. + vpx_codec_priv_t *const priv = ctx->priv; + char *const dst_buf = (char *)priv->enc.cx_data_dst_buf.buf; + + if (dst_buf && + pkt->data.raw.buf != dst_buf && + pkt->data.raw.sz + priv->enc.cx_data_pad_before + + priv->enc.cx_data_pad_after <= priv->enc.cx_data_dst_buf.sz) { + vpx_codec_cx_pkt_t *modified_pkt = &priv->enc.cx_data_pkt; + + memcpy(dst_buf + priv->enc.cx_data_pad_before, pkt->data.raw.buf, + pkt->data.raw.sz); *modified_pkt = *pkt; modified_pkt->data.raw.buf = dst_buf; - modified_pkt->data.raw.sz += ctx->priv->enc.cx_data_pad_before - + ctx->priv->enc.cx_data_pad_after; + modified_pkt->data.raw.sz += priv->enc.cx_data_pad_before + + priv->enc.cx_data_pad_after; pkt = modified_pkt; } if (dst_buf == pkt->data.raw.buf) { - ctx->priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz; - ctx->priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz; + priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz; + priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz; } } diff --git a/vpx/src/vpx_psnr.c b/vpx/src/vpx_psnr.c new file mode 100644 index 000000000..05843acb6 --- /dev/null +++ b/vpx/src/vpx_psnr.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> + +#include "vpx/internal/vpx_psnr.h" + +#define MAX_PSNR 100.0 + +double vpx_sse_to_psnr(double samples, double peak, double sse) { + if (sse > 0.0) { + const double psnr = 10.0 * log10(samples * peak * peak / sse); + return psnr > MAX_PSNR ? MAX_PSNR : psnr; + } else { + return MAX_PSNR; + } +} diff --git a/vpx/svc_context.h b/vpx/svc_context.h index f675fb684..98474ca91 100644 --- a/vpx/svc_context.h +++ b/vpx/svc_context.h @@ -64,7 +64,8 @@ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options); * e.g., "60,53,39,33,27" */ vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx, - const char *quantizer_values); + const char *quantizer_values, + const int is_for_keyframe); /** * Set SVC scale factors diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk index 111c87e53..98d1d567c 100644 --- a/vpx/vpx_codec.mk +++ b/vpx/vpx_codec.mk @@ -34,8 +34,10 @@ API_SRCS-yes += vpx_decoder.h API_SRCS-yes += src/vpx_encoder.c API_SRCS-yes += vpx_encoder.h API_SRCS-yes += internal/vpx_codec_internal.h +API_SRCS-yes += internal/vpx_psnr.h API_SRCS-yes += src/vpx_codec.c API_SRCS-yes += src/vpx_image.c +API_SRCS-yes += src/vpx_psnr.c API_SRCS-yes += vpx_codec.h API_SRCS-yes += vpx_codec.mk API_SRCS-yes += vpx_frame_buffer.h diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 1d9f0c9b7..851ff1ae8 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -610,6 +610,13 @@ extern "C" { */ unsigned int ss_number_layers; + /*!\brief Target bitrate for each spatial layer. + * + * These values specify the target coding bitrate to be used for each + * spatial layer. + */ + unsigned int ss_target_bitrate[VPX_SS_MAX_LAYERS]; + /*!\brief Number of temporal coding layers. * * This value specifies the number of temporal layers to be used. |