diff options
-rw-r--r-- | examples.mk | 14 | ||||
-rw-r--r-- | examples/vp8_multi_resolution_encoder.c | 8 | ||||
-rw-r--r-- | examples/vp9_spatial_svc_encoder.c | 32 | ||||
-rw-r--r-- | examples/vpx_temporal_svc_encoder.c | 29 | ||||
-rwxr-xr-x | test/tools_common.sh | 2 | ||||
-rwxr-xr-x | test/vpx_temporal_svc_encoder.sh | 7 | ||||
-rw-r--r-- | tools_common.c | 90 | ||||
-rw-r--r-- | tools_common.h | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 101 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 12 | ||||
-rw-r--r-- | vpxdec.c | 8 | ||||
-rw-r--r-- | vpxenc.c | 85 |
12 files changed, 262 insertions, 134 deletions
diff --git a/examples.mk b/examples.mk index a1d4eb68c..e0da4caa2 100644 --- a/examples.mk +++ b/examples.mk @@ -72,6 +72,7 @@ vpxdec.SRCS += vpx_ports/vpx_timer.h vpxdec.SRCS += vpx/vpx_integer.h vpxdec.SRCS += args.c args.h vpxdec.SRCS += ivfdec.c ivfdec.h +vpxdec.SRCS += y4minput.c y4minput.h vpxdec.SRCS += tools_common.c tools_common.h vpxdec.SRCS += y4menc.c y4menc.h ifeq ($(CONFIG_LIBYUV),yes) @@ -113,6 +114,7 @@ vpxenc.DESCRIPTION = Full featured encoder EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_spatial_svc_encoder.c vp9_spatial_svc_encoder.SRCS += args.c args.h vp9_spatial_svc_encoder.SRCS += ivfenc.c ivfenc.h +vp9_spatial_svc_encoder.SRCS += y4minput.c y4minput.h vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h vp9_spatial_svc_encoder.SRCS += video_common.h vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c @@ -129,6 +131,7 @@ endif EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_svc_encoder.c vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h +vpx_temporal_svc_encoder.SRCS += y4minput.c y4minput.h vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h vpx_temporal_svc_encoder.SRCS += video_common.h vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c @@ -138,6 +141,7 @@ vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC simple_decoder.SRCS += ivfdec.h ivfdec.c +simple_decoder.SRCS += y4minput.c y4minput.h simple_decoder.SRCS += tools_common.h tools_common.c simple_decoder.SRCS += video_common.h simple_decoder.SRCS += video_reader.h video_reader.c @@ -147,6 +151,7 @@ simple_decoder.SRCS += vpx_ports/msvc.h simple_decoder.DESCRIPTION = Simplified decoder loop EXAMPLES-$(CONFIG_DECODERS) += postproc.c postproc.SRCS += ivfdec.h ivfdec.c +postproc.SRCS += y4minput.c y4minput.h postproc.SRCS += tools_common.h tools_common.c postproc.SRCS += video_common.h postproc.SRCS += video_reader.h video_reader.c @@ -158,6 +163,7 @@ postproc.DESCRIPTION = Decoder postprocessor control EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c decode_to_md5.SRCS += md5_utils.h md5_utils.c decode_to_md5.SRCS += ivfdec.h ivfdec.c +decode_to_md5.SRCS += y4minput.c y4minput.h decode_to_md5.SRCS += tools_common.h tools_common.c decode_to_md5.SRCS += video_common.h decode_to_md5.SRCS += video_reader.h video_reader.c @@ -168,6 +174,7 @@ decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42 decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c simple_encoder.SRCS += ivfenc.h ivfenc.c +simple_encoder.SRCS += y4minput.c y4minput.h simple_encoder.SRCS += tools_common.h tools_common.c simple_encoder.SRCS += video_common.h simple_encoder.SRCS += video_writer.h video_writer.c @@ -176,6 +183,7 @@ simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD simple_encoder.DESCRIPTION = Simplified encoder loop EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_lossless_encoder.c vp9_lossless_encoder.SRCS += ivfenc.h ivfenc.c +vp9_lossless_encoder.SRCS += y4minput.c y4minput.h vp9_lossless_encoder.SRCS += tools_common.h tools_common.c vp9_lossless_encoder.SRCS += video_common.h vp9_lossless_encoder.SRCS += video_writer.h video_writer.c @@ -184,6 +192,7 @@ vp9_lossless_encoder.GUID = B63C7C88-5348-46DC-A5A6-CC151EF93366 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c twopass_encoder.SRCS += ivfenc.h ivfenc.c +twopass_encoder.SRCS += y4minput.c y4minput.h twopass_encoder.SRCS += tools_common.h tools_common.c twopass_encoder.SRCS += video_common.h twopass_encoder.SRCS += video_writer.h video_writer.c @@ -192,6 +201,7 @@ twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8 twopass_encoder.DESCRIPTION = Two-pass encoder loop EXAMPLES-$(CONFIG_DECODERS) += decode_with_drops.c decode_with_drops.SRCS += ivfdec.h ivfdec.c +decode_with_drops.SRCS += y4minput.c y4minput.h decode_with_drops.SRCS += tools_common.h tools_common.c decode_with_drops.SRCS += video_common.h decode_with_drops.SRCS += video_reader.h video_reader.c @@ -202,6 +212,7 @@ decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26 decode_with_drops.DESCRIPTION = Drops frames while decoding EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c set_maps.SRCS += ivfenc.h ivfenc.c +set_maps.SRCS += y4minput.c y4minput.h set_maps.SRCS += tools_common.h tools_common.c set_maps.SRCS += video_common.h set_maps.SRCS += video_writer.h video_writer.c @@ -210,6 +221,7 @@ set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F set_maps.DESCRIPTION = Set active and ROI maps EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c +vp8cx_set_ref.SRCS += y4minput.c y4minput.h vp8cx_set_ref.SRCS += tools_common.h tools_common.c vp8cx_set_ref.SRCS += video_common.h vp8cx_set_ref.SRCS += video_writer.h video_writer.c @@ -221,6 +233,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) ifeq ($(CONFIG_DECODERS),yes) EXAMPLES-yes += vp9cx_set_ref.c vp9cx_set_ref.SRCS += ivfenc.h ivfenc.c +vp9cx_set_ref.SRCS += y4minput.c y4minput.h vp9cx_set_ref.SRCS += tools_common.h tools_common.c vp9cx_set_ref.SRCS += video_common.h vp9cx_set_ref.SRCS += video_writer.h video_writer.c @@ -233,6 +246,7 @@ ifeq ($(CONFIG_MULTI_RES_ENCODING),yes) ifeq ($(CONFIG_LIBYUV),yes) EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c vp8_multi_resolution_encoder.SRCS += ivfenc.h ivfenc.c +vp8_multi_resolution_encoder.SRCS += y4minput.c y4minput.h vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c vp8_multi_resolution_encoder.SRCS += video_writer.h video_writer.c vp8_multi_resolution_encoder.SRCS += vpx_ports/msvc.h diff --git a/examples/vp8_multi_resolution_encoder.c b/examples/vp8_multi_resolution_encoder.c index b14b1ff39..e72f8a019 100644 --- a/examples/vp8_multi_resolution_encoder.c +++ b/examples/vp8_multi_resolution_encoder.c @@ -61,7 +61,7 @@ void usage_exit(void) { exit(EXIT_FAILURE); } int (*read_frame_p)(FILE *f, vpx_image_t *img); -static int read_frame(FILE *f, vpx_image_t *img) { +static int mulres_read_frame(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; @@ -75,7 +75,7 @@ static int read_frame(FILE *f, vpx_image_t *img) { return res; } -static int read_frame_by_row(FILE *f, vpx_image_t *img) { +static int mulres_read_frame_by_row(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; int plane; @@ -471,9 +471,9 @@ int main(int argc, char **argv) { die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h); if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w) - read_frame_p = read_frame; + read_frame_p = mulres_read_frame; else - read_frame_p = read_frame_by_row; + read_frame_p = mulres_read_frame_by_row; for (i = 0; i < NUM_ENCODERS; i++) if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0); diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index f8093e1bf..92b310684 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -30,6 +30,8 @@ #include "vpx/vpx_encoder.h" #include "../vpxstats.h" #include "vp9/encoder/vp9_encoder.h" +#include "./y4minput.h" + #define OUTPUT_RC_STATS 1 static const arg_def_t skip_frames_arg = @@ -161,7 +163,6 @@ static const int32_t default_speed = -1; // -1 means use library default. static const uint32_t default_threads = 0; // zero means use library default. typedef struct { - const char *input_filename; const char *output_filename; uint32_t frames_to_code; uint32_t frames_to_skip; @@ -393,10 +394,16 @@ static void parse_command_line(int argc, const char **argv_, if (argv[0] == NULL || argv[1] == 0) { usage_exit(); } - app_input->input_filename = argv[0]; + app_input->input_ctx.filename = argv[0]; app_input->output_filename = argv[1]; free(argv); + open_input_file(&app_input->input_ctx); + if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) { + enc_cfg->g_w = app_input->input_ctx.width; + enc_cfg->g_h = app_input->input_ctx.height; + } + if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || enc_cfg->g_h % 2) die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); @@ -752,7 +759,6 @@ int main(int argc, const char **argv) { vpx_codec_err_t res; int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ - FILE *infile = NULL; int end_of_stream = 0; int frames_received = 0; #if OUTPUT_RC_STATS @@ -773,6 +779,13 @@ int main(int argc, const char **argv) { memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); memset(&rc, 0, sizeof(struct RateControlStats)); exec_name = argv[0]; + + /* Setup default input stream settings */ + app_input.input_ctx.framerate.numerator = 30; + app_input.input_ctx.framerate.denominator = 1; + app_input.input_ctx.only_i420 = 1; + app_input.input_ctx.bit_depth = 0; + parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); // Allocate image buffer @@ -789,9 +802,6 @@ int main(int argc, const char **argv) { } #endif // CONFIG_VP9_HIGHBITDEPTH - if (!(infile = fopen(app_input.input_filename, "rb"))) - die("Failed to open %s for reading\n", app_input.input_filename); - // Initialize codec if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) != VPX_CODEC_OK) @@ -835,7 +845,8 @@ int main(int argc, const char **argv) { #endif // skip initial frames - for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile); + for (i = 0; i < app_input.frames_to_skip; ++i) + read_frame(&app_input.input_ctx, &raw); if (svc_ctx.speed != -1) vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed); @@ -875,7 +886,8 @@ int main(int argc, const char **argv) { // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example // uses the extended API. int example_pattern = 0; - if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) { + if (frame_cnt >= app_input.frames_to_code || + !read_frame(&app_input.input_ctx, &raw)) { // We need one extra vpx_svc_encode call at end of stream to flush // encoder and get remaining data end_of_stream = 1; @@ -1091,7 +1103,9 @@ int main(int argc, const char **argv) { } printf("Processed %d frames\n", frame_cnt); - fclose(infile); + + close_input_file(&app_input.input_ctx); + #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { printout_rate_control_summary(&rc, &enc_cfg, frame_cnt); diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c index f49ef7b1d..aa2213a5b 100644 --- a/examples/vpx_temporal_svc_encoder.c +++ b/examples/vpx_temporal_svc_encoder.c @@ -19,6 +19,7 @@ #include <string.h> #include "./vpx_config.h" +#include "./y4minput.h" #include "../vpx_ports/vpx_timer.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" @@ -594,7 +595,7 @@ int main(int argc, char **argv) { #endif vpx_svc_layer_id_t layer_id; const VpxInterface *encoder = NULL; - FILE *infile = NULL; + struct VpxInputContext input_ctx; struct RateControlMetrics rc; int64_t cx_time = 0; const int min_args_base = 13; @@ -611,6 +612,13 @@ int main(int argc, char **argv) { zero(rc.layer_target_bitrate); memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&input_ctx, 0, sizeof(input_ctx)); + /* Setup default input stream settings */ + input_ctx.framerate.numerator = 30; + input_ctx.framerate.denominator = 1; + input_ctx.only_i420 = 1; + input_ctx.bit_depth = 0; + exec_name = argv[0]; // Check usage and arguments. if (argc < min_args) { @@ -754,9 +762,18 @@ int main(int argc, char **argv) { // Set to layer_target_bitrate for highest layer (total bitrate). cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1]; - // Open input file. - if (!(infile = fopen(argv[1], "rb"))) { - die("Failed to open %s for reading", argv[1]); + input_ctx.filename = argv[1]; + open_input_file(&input_ctx); + + if (input_ctx.file_type == FILE_TYPE_Y4M) { + if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) { + die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); + } + if (input_ctx.framerate.numerator != cfg.g_timebase.den || + input_ctx.framerate.denominator != cfg.g_timebase.num) { + die("Incorrect framerate: numerator %d denominator %d", + cfg.g_timebase.num, cfg.g_timebase.den); + } } framerate = cfg.g_timebase.den / cfg.g_timebase.num; @@ -865,7 +882,7 @@ int main(int argc, char **argv) { } flags = layer_flags[frame_cnt % flag_periodicity]; if (layering_mode == 0) flags = 0; - frame_avail = vpx_img_read(&raw, infile); + frame_avail = read_frame(&input_ctx, &raw); if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id]; vpx_usec_timer_start(&timer); if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, @@ -933,7 +950,7 @@ int main(int argc, char **argv) { ++frame_cnt; pts += frame_duration; } - fclose(infile); + close_input_file(&input_ctx); printout_rate_control_summary(&rc, &cfg, frame_cnt); printf("\n"); printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", diff --git a/test/tools_common.sh b/test/tools_common.sh index 7caad9f06..844a12534 100755 --- a/test/tools_common.sh +++ b/test/tools_common.sh @@ -412,6 +412,8 @@ YUV_RAW_INPUT_HEIGHT=288 Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m" Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m" +Y4M_720P_INPUT_WIDTH=1280 +Y4M_720P_INPUT_HEIGHT=720 # Setup a trap function to clean up after tests complete. trap cleanup EXIT diff --git a/test/vpx_temporal_svc_encoder.sh b/test/vpx_temporal_svc_encoder.sh index fd1045e7f..5e5bac8fa 100755 --- a/test/vpx_temporal_svc_encoder.sh +++ b/test/vpx_temporal_svc_encoder.sh @@ -38,6 +38,7 @@ vpx_tsvc_encoder() { local output_file="${VPX_TEST_OUTPUT_DIR}/${output_file_base}" local timebase_num="1" local timebase_den="1000" + local timebase_den_y4m="30" local speed="6" local frame_drop_thresh="30" local max_threads="4" @@ -58,6 +59,12 @@ vpx_tsvc_encoder() { "${YUV_RAW_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den}" \ "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ "$@" ${devnull} + # Test for y4m input. + eval "${VPX_TEST_PREFIX}" "${encoder}" "${Y4M_720P_INPUT}" \ + "${output_file}" "${codec}" "${Y4M_720P_INPUT_WIDTH}" \ + "${Y4M_720P_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den_y4m}" \ + "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ + "$@" ${devnull} else eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" \ "${output_file}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ diff --git a/tools_common.c b/tools_common.c index b1c35e815..2cd8a0704 100644 --- a/tools_common.c +++ b/tools_common.c @@ -46,6 +46,14 @@ va_end(ap); \ } while (0) +#if CONFIG_ENCODERS +/* Swallow warnings about unused results of fread/fwrite */ +static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { + return fread(ptr, size, nmemb, stream); +} +#define fread wrap_fread +#endif + FILE *set_binary_mode(FILE *stream) { (void)stream; #if defined(_WIN32) || defined(__OS2__) @@ -264,6 +272,88 @@ double sse_to_psnr(double samples, double peak, double sse) { } } +#if CONFIG_ENCODERS +int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { + FILE *f = input_ctx->file; + y4m_input *y4m = &input_ctx->y4m; + int shortread = 0; + + if (input_ctx->file_type == FILE_TYPE_Y4M) { + if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; + } else { + shortread = read_yuv_frame(input_ctx, img); + } + + return !shortread; +} + +int file_is_y4m(const char detect[4]) { + if (memcmp(detect, "YUV4", 4) == 0) { + return 1; + } + return 0; +} + +int fourcc_is_ivf(const char detect[4]) { + if (memcmp(detect, "DKIF", 4) == 0) { + return 1; + } + return 0; +} + +void open_input_file(struct VpxInputContext *input) { + /* Parse certain options from the input file, if possible */ + input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") + : set_binary_mode(stdin); + + if (!input->file) fatal("Failed to open input file"); + + if (!fseeko(input->file, 0, SEEK_END)) { + /* Input file is seekable. Figure out how long it is, so we can get + * progress info. + */ + input->length = ftello(input->file); + rewind(input->file); + } + + /* Default to 1:1 pixel aspect ratio. */ + input->pixel_aspect_ratio.numerator = 1; + input->pixel_aspect_ratio.denominator = 1; + + /* For RAW input sources, these bytes will applied on the first frame + * in read_frame(). + */ + input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); + input->detect.position = 0; + + if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { + if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, + input->only_i420) >= 0) { + input->file_type = FILE_TYPE_Y4M; + input->width = input->y4m.pic_w; + input->height = input->y4m.pic_h; + input->pixel_aspect_ratio.numerator = input->y4m.par_n; + input->pixel_aspect_ratio.denominator = input->y4m.par_d; + input->framerate.numerator = input->y4m.fps_n; + input->framerate.denominator = input->y4m.fps_d; + input->fmt = input->y4m.vpx_fmt; + input->bit_depth = input->y4m.bit_depth; + } else { + fatal("Unsupported Y4M stream."); + } + } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { + fatal("IVF is not supported as input."); + } else { + input->file_type = FILE_TYPE_RAW; + } +} + +void close_input_file(struct VpxInputContext *input) { + fclose(input->file); + if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); +} +#endif + // TODO(debargha): Consolidate the functions below into a separate file. #if CONFIG_VP9_HIGHBITDEPTH static void highbd_img_upshift(vpx_image_t *dst, vpx_image_t *src, diff --git a/tools_common.h b/tools_common.h index bb169351b..28695e4f1 100644 --- a/tools_common.h +++ b/tools_common.h @@ -152,6 +152,14 @@ int vpx_img_read(vpx_image_t *img, FILE *file); double sse_to_psnr(double samples, double peak, double mse); +#if CONFIG_ENCODERS +int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img); +int file_is_y4m(const char detect[4]); +int fourcc_is_ivf(const char detect[4]); +void open_input_file(struct VpxInputContext *input); +void close_input_file(struct VpxInputContext *input); +#endif + #if CONFIG_VP9_HIGHBITDEPTH void vpx_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift); void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index eaeb3d96e..b8c86ea43 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2583,6 +2583,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]); } + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); } #endif vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); @@ -6014,22 +6015,27 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, } #if CONFIG_NON_GREEDY_MV -static void get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture, - int frame_idx, int rf_idx, int mi_row, - int mi_col, struct buf_2d *src, - struct buf_2d *pre) { +static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture, + int frame_idx, int rf_idx, int mi_row, + int mi_col, struct buf_2d *src, + struct buf_2d *pre) { const int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; YV12_BUFFER_CONFIG *ref_frame = NULL; int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; if (ref_frame_idx != -1) { ref_frame = gf_picture[ref_frame_idx].frame; + src->buf = xd->cur_buf->y_buffer + mb_y_offset; + src->stride = xd->cur_buf->y_stride; + pre->buf = ref_frame->y_buffer + mb_y_offset; + pre->stride = ref_frame->y_stride; + assert(src->stride == pre->stride); + return 1; + } else { + printf("invalid ref_frame_idx"); + assert(ref_frame_idx != -1); + return 0; } - src->buf = xd->cur_buf->y_buffer + mb_y_offset; - src->stride = xd->cur_buf->y_stride; - pre->buf = ref_frame->y_buffer + mb_y_offset; - pre->stride = ref_frame->y_stride; - assert(src->stride == pre->stride); } #define kMvPreCheckLines 5 @@ -6135,18 +6141,60 @@ static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col); full_mv = get_full_mv(&mv->as_mv); - get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, - &src, &pre); - // TODO(angiebird): Consider subpixel when computing the sse. - cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), - pre.stride, &sse); - return (double)sse; + if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, + &src, &pre)) { + // TODO(angiebird): Consider subpixel when computing the sse. + cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), + pre.stride, &sse); + return (double)sse; + } else { + assert(0); + return 0; + } } -static double get_mv_cost(int mv_mode) { - // TODO(angiebird): Implement this function. - (void)mv_mode; - return 0; +static int get_mv_mode_cost(int mv_mode) { + // TODO(angiebird): The probabilities are roughly inferred from + // default_inter_mode_probs. Check if there is a better way to set the + // probabilities. + const int zero_mv_prob = 9; + const int new_mv_prob = 77; + const int ref_mv_prob = 170; + assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256); + switch (mv_mode) { + case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break; + case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break; + case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; + case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; + default: assert(0); return -1; + } +} + +static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) { + double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) + + log2(1 + abs(new_mv->col - ref_mv->col)); + mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT); + return mv_diff_cost; +} +static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, + int rf_idx, BLOCK_SIZE bsize, int mi_row, + int mi_col) { + double mv_cost = get_mv_mode_cost(mv_mode); + if (mv_mode == NEW_MV_MODE) { + MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, + mi_row, mi_col) + .as_mv; + MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, tpl_frame, rf_idx, + bsize, mi_row, mi_col) + .as_mv; + MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, tpl_frame, rf_idx, + bsize, mi_row, mi_col) + .as_mv; + double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv); + double near_cost = get_mv_diff_cost(&new_mv, &near_mv); + mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost; + } + return mv_cost; } static double rd_cost(int rdmult, int rddiv, double rate, double dist) { @@ -6160,7 +6208,9 @@ static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx, tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); - double mv_cost = get_mv_cost(mv_mode); + double mv_cost = + get_mv_cost(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col); + return rd_cost(x->rdmult, x->rddiv, mv_cost, mv_dist); } @@ -6218,7 +6268,7 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, // no new mv // diagnal scan order tmp_idx = 0; - for (idx = 0; idx < kMvPreCheckSize; ++idx) { + for (idx = 0; idx < kMvPreCheckLines; ++idx) { int r; for (r = 0; r <= idx; ++r) { int c = idx - r; @@ -6245,7 +6295,7 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, &select_mv_arr[mi_row * stride + mi_col]); // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE // beforehand. - for (idx = 1; idx < kMvPreCheckSize; ++idx) { + for (idx = 1; idx < kMvPreCheckLines; ++idx) { int r; for (r = 0; r <= idx; ++r) { int c = idx - r; @@ -6266,7 +6316,7 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, tmp_idx = 0; if (no_new_mv_rd < new_mv_rd) { *rd = no_new_mv_rd; - for (idx = 0; idx < kMvPreCheckSize; ++idx) { + for (idx = 0; idx < kMvPreCheckLines; ++idx) { int r; for (r = 0; r <= idx; ++r) { int c = idx - r; @@ -6783,6 +6833,11 @@ static void init_tpl_buffer(VP9_COMP *cpi) { sizeof( *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]))); } + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx], + vpx_calloc(mi_rows * mi_cols * 4, + sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx]))); } #endif vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 0fdc61649..8cd1e6e31 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1943,8 +1943,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // For SVC with quality layers, when QP of lower layer is lower // than current layer: force check of GF-ZEROMV before early exit // due to skip flag. - if (svc->spatial_layer_id > 0 && usable_ref_frame == GOLDEN_FRAME && - no_scaling && cm->base_qindex > svc->lower_layer_qindex + 10) + if (svc->spatial_layer_id > 0 && no_scaling && + (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && + cm->base_qindex > svc->lower_layer_qindex + 10) force_test_gf_zeromv = 1; for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) { @@ -2407,6 +2408,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Perform intra prediction search, if the best SAD is above a certain // threshold. if (best_rdc.rdcost == INT64_MAX || + (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->source_variance == 0 && + !x->zero_temp_sad_source) || (scene_change_detected && perform_intra_pred) || ((!force_skip_low_temp_var || bsize < BLOCK_32X32 || x->content_state_sb == kVeryHighSad) && @@ -2449,8 +2452,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, const PREDICTION_MODE this_mode = intra_mode_list[i]; THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; int mode_rd_thresh = rd_threshes[mode_index]; + // For spatially flat blocks, under short_circuit_flat_blocks flag: + // only check DC mode for stationary blocks, otherwise also check + // H and V mode. if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - this_mode != DC_PRED) { + ((x->zero_temp_sad_source && this_mode != DC_PRED) || i > 2)) { continue; } @@ -265,8 +265,8 @@ static int raw_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, return 1; } -static int read_frame(struct VpxDecInputContext *input, uint8_t **buf, - size_t *bytes_in_buffer, size_t *buffer_size) { +static int dec_read_frame(struct VpxDecInputContext *input, uint8_t **buf, + size_t *bytes_in_buffer, size_t *buffer_size) { switch (input->vpx_input_ctx->file_type) { #if CONFIG_WEBM_IO case FILE_TYPE_WEBM: @@ -806,7 +806,7 @@ static int main_loop(int argc, const char **argv_) { if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip); while (arg_skip) { - if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; + if (dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; arg_skip--; } @@ -837,7 +837,7 @@ static int main_loop(int argc, const char **argv_) { frame_avail = 0; if (!stop_after || frame_in < stop_after) { - if (!read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { + if (!dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { frame_avail = 1; frame_in++; @@ -50,12 +50,6 @@ #endif #include "./y4minput.h" -/* Swallow warnings about unused results of fread/fwrite */ -static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { - return fread(ptr, size, nmemb, stream); -} -#define fread wrap_fread - static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { return fwrite(ptr, size, nmemb, stream); @@ -95,34 +89,6 @@ static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal, va_end(ap); } -static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { - FILE *f = input_ctx->file; - y4m_input *y4m = &input_ctx->y4m; - int shortread = 0; - - if (input_ctx->file_type == FILE_TYPE_Y4M) { - if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; - } else { - shortread = read_yuv_frame(input_ctx, img); - } - - return !shortread; -} - -static int file_is_y4m(const char detect[4]) { - if (memcmp(detect, "YUV4", 4) == 0) { - return 1; - } - return 0; -} - -static int fourcc_is_ivf(const char detect[4]) { - if (memcmp(detect, "DKIF", 4) == 0) { - return 1; - } - return 0; -} - static const arg_def_t help = ARG_DEF(NULL, "help", 0, "Show usage options and exit"); static const arg_def_t debugmode = @@ -1020,57 +986,6 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) { } } -static void open_input_file(struct VpxInputContext *input) { - /* Parse certain options from the input file, if possible */ - input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") - : set_binary_mode(stdin); - - if (!input->file) fatal("Failed to open input file"); - - if (!fseeko(input->file, 0, SEEK_END)) { - /* Input file is seekable. Figure out how long it is, so we can get - * progress info. - */ - input->length = ftello(input->file); - rewind(input->file); - } - - /* Default to 1:1 pixel aspect ratio. */ - input->pixel_aspect_ratio.numerator = 1; - input->pixel_aspect_ratio.denominator = 1; - - /* For RAW input sources, these bytes will applied on the first frame - * in read_frame(). - */ - input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); - input->detect.position = 0; - - if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { - if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, - input->only_i420) >= 0) { - input->file_type = FILE_TYPE_Y4M; - input->width = input->y4m.pic_w; - input->height = input->y4m.pic_h; - input->pixel_aspect_ratio.numerator = input->y4m.par_n; - input->pixel_aspect_ratio.denominator = input->y4m.par_d; - input->framerate.numerator = input->y4m.fps_n; - input->framerate.denominator = input->y4m.fps_d; - input->fmt = input->y4m.vpx_fmt; - input->bit_depth = input->y4m.bit_depth; - } else - fatal("Unsupported Y4M stream."); - } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { - fatal("IVF is not supported as input."); - } else { - input->file_type = FILE_TYPE_RAW; - } -} - -static void close_input_file(struct VpxInputContext *input) { - fclose(input->file); - if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); -} - static struct stream_state *new_stream(struct VpxEncoderConfig *global, struct stream_state *prev) { struct stream_state *stream; |