summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build/make/obj_int_extract.c16
-rw-r--r--examples.mk11
-rw-r--r--examples/decode_to_md5.c23
-rw-r--r--examples/decode_with_drops.c185
-rw-r--r--examples/postproc.c191
-rw-r--r--examples/simple_decoder.c148
-rw-r--r--ivfenc.c43
-rw-r--r--ivfenc.h6
-rw-r--r--libs.mk5
-rw-r--r--rate_hist.c282
-rw-r--r--rate_hist.h40
-rw-r--r--test/svc_test.cc8
-rw-r--r--tools_common.c44
-rw-r--r--tools_common.h9
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.asm131
-rw-r--r--vp9/common/vp9_blockd.h2
-rw-r--r--vp9/common/vp9_filter.c1
-rw-r--r--vp9/common/vp9_filter.h5
-rw-r--r--vp9/common/vp9_idct.c68
-rw-r--r--vp9/common/vp9_onyx.h1
-rw-r--r--vp9/common/vp9_reconinter.c15
-rw-r--r--vp9/common/vp9_reconinter.h8
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c529
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c102
-rw-r--r--vp9/decoder/vp9_decodeframe.c3
-rw-r--r--vp9/encoder/vp9_dct.c12
-rw-r--r--vp9/encoder/vp9_encodeframe.c566
-rw-r--r--vp9/encoder/vp9_encodemb.c19
-rw-r--r--vp9/encoder/vp9_encodemb.h3
-rw-r--r--vp9/encoder/vp9_firstpass.c175
-rw-r--r--vp9/encoder/vp9_lookahead.c1
-rw-r--r--vp9/encoder/vp9_onyx_if.c68
-rw-r--r--vp9/encoder/vp9_onyx_int.h123
-rw-r--r--vp9/encoder/vp9_pickmode.c52
-rw-r--r--vp9/encoder/vp9_pickmode.h3
-rw-r--r--vp9/encoder/vp9_ratectrl.c44
-rw-r--r--vp9/encoder/vp9_ratectrl.h95
-rw-r--r--vp9/encoder/vp9_rdopt.c88
-rw-r--r--vp9/encoder/vp9_temporal_filter.c28
-rw-r--r--vp9/encoder/vp9_temporal_filter.h5
-rw-r--r--vp9/encoder/x86/vp9_dct_avx2.c74
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c74
-rw-r--r--vp9/vp9_cx_iface.c19
-rw-r--r--vp9_spatial_scalable_encoder.c14
-rw-r--r--vpxdec.c33
-rw-r--r--vpxenc.c285
-rw-r--r--y4menc.c25
-rw-r--r--y4menc.h12
49 files changed, 1949 insertions, 1747 deletions
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index 495e9d7fa..8c169577c 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -21,6 +21,7 @@ typedef enum {
OUTPUT_FMT_PLAIN,
OUTPUT_FMT_RVDS,
OUTPUT_FMT_GAS,
+ OUTPUT_FMT_C_HEADER,
} output_fmt_t;
int log_msg(const char *fmt, ...) {
@@ -43,9 +44,12 @@ int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
case OUTPUT_FMT_RVDS:
printf("%-40s EQU %5d\n", name, val);
return 0;
- case OUTPUT_FMT_GAS:
+ case OUTPUT_FMT_GAS:
printf(".set %-40s, %5d\n", name, val);
return 0;
+ case OUTPUT_FMT_C_HEADER:
+ printf("#define %-40s %5d\n", name, val);
+ return 0;
default:
log_msg("Unsupported mode: %d", mode);
return 1;
@@ -491,6 +495,13 @@ int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
sym.st_name),
val);
break;
+ case OUTPUT_FMT_C_HEADER:
+ printf("#define %-40s %5d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ break;
default:
printf("%s = %d\n",
parse_elf_string_table(&elf,
@@ -762,6 +773,7 @@ int main(int argc, char **argv) {
fprintf(stderr, "Output Formats:\n");
fprintf(stderr, " gas - compatible with GNU assembler\n");
fprintf(stderr, " rvds - compatible with armasm\n");
+ fprintf(stderr, " cheader - c/c++ header file\n");
goto bail;
}
@@ -771,6 +783,8 @@ int main(int argc, char **argv) {
mode = OUTPUT_FMT_RVDS;
else if (!strcmp(argv[1], "gas"))
mode = OUTPUT_FMT_GAS;
+ else if (!strcmp(argv[1], "cheader"))
+ mode = OUTPUT_FMT_C_HEADER;
else
f = argv[1];
diff --git a/examples.mk b/examples.mk
index a10ee52d7..0b62df96a 100644
--- a/examples.mk
+++ b/examples.mk
@@ -41,6 +41,7 @@ UTILS-$(CONFIG_ENCODERS) += vpxenc.c
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h vpxenc.h
vpxenc.SRCS += ivfdec.c ivfdec.h
vpxenc.SRCS += ivfenc.c ivfenc.h
+vpxenc.SRCS += rate_hist.c rate_hist.h
vpxenc.SRCS += tools_common.c tools_common.h
vpxenc.SRCS += warnings.c warnings.h
vpxenc.SRCS += webmenc.c webmenc.h
@@ -74,9 +75,13 @@ endif
#example_xma.DESCRIPTION = External Memory Allocation mode usage
GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += simple_decoder.c
-simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
-simple_decoder.DESCRIPTION = Simplified decoder loop
+simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
+simple_decoder.SRCS += ivfdec.h ivfdec.c
+simple_decoder.SRCS += tools_common.h tools_common.c
+simple_decoder.DESCRIPTION = Simplified decoder loop
GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += postproc.c
+postproc.SRCS += ivfdec.h ivfdec.c
+postproc.SRCS += tools_common.h tools_common.c
postproc.GUID = 65E33355-F35E-4088-884D-3FD4905881D7
postproc.DESCRIPTION = Decoder postprocessor control
GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += decode_to_md5.c
@@ -97,6 +102,8 @@ force_keyframe.GUID = 3C67CADF-029F-4C86-81F5-D6D4F51177F0
force_keyframe.DESCRIPTION = Force generation of keyframes
ifeq ($(CONFIG_DECODERS),yes)
GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += decode_with_drops.c
+decode_with_drops.SRCS += ivfdec.h ivfdec.c
+decode_with_drops.SRCS += tools_common.h tools_common.c
endif
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
decode_with_drops.DESCRIPTION = Drops frames while decoding
diff --git a/examples/decode_to_md5.c b/examples/decode_to_md5.c
index 67cbd6c63..bba218209 100644
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -29,7 +29,6 @@
// is processed, then U, then V. It is important to honor the image's `stride`
// values.
-#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -44,28 +43,6 @@
#include "./tools_common.h"
#include "./vpx_config.h"
-#define VP8_FOURCC 0x30385056
-#define VP9_FOURCC 0x30395056
-
-static vpx_codec_iface_t *get_codec_interface(unsigned int fourcc) {
- switch (fourcc) {
- case VP8_FOURCC:
- return vpx_codec_vp8_dx();
- case VP9_FOURCC:
- return vpx_codec_vp9_dx();
- }
- return NULL;
-}
-
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
-
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if(detail)
- printf(" %s\n",detail);
- exit(EXIT_FAILURE);
-}
-
static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
int plane, y;
MD5Context md5;
diff --git a/examples/decode_with_drops.c b/examples/decode_with_drops.c
index bfb6d3a55..12686dedd 100644
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -52,126 +52,103 @@
// The example decides whether to drop the frame based on the current
// frame number, immediately before decoding the frame.
-#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
+#include "./ivfdec.h"
+
#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
+#include "./tools_common.h"
+#include "./vpx_config.h"
-#define IVF_FILE_HDR_SZ (32)
-#define IVF_FRAME_HDR_SZ (12)
+static const char *exec_name;
-static unsigned int mem_get_le32(const unsigned char *mem) {
- return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
+void usage_exit() {
+ fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
+ exit(EXIT_FAILURE);
}
-static void die(const char *fmt, ...) {
- va_list ap;
+int main(int argc, char **argv) {
+ FILE *infile, *outfile;
+ vpx_codec_ctx_t codec;
+ vpx_codec_iface_t *iface;
+ int flags = 0, frame_cnt = 0;
+ vpx_video_t *video;
+ int n, m, is_range;
+ char *nptr;
+
+ exec_name = argv[0];
- va_start(ap, fmt);
- vprintf(fmt, ap);
- if(fmt[strlen(fmt)-1] != '\n')
- printf("\n");
- exit(EXIT_FAILURE);
-}
+ if (argc != 4)
+ die("Invalid number of arguments");
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
+ if (!(infile = fopen(argv[1], "rb")))
+ die("Failed to open %s for reading", argv[1]);
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if(detail)
- printf(" %s\n",detail);
- exit(EXIT_FAILURE);
-}
+ if (!(outfile = fopen(argv[2], "wb")))
+ die("Failed to open %s for writing", argv[2]);
+ n = strtol(argv[3], &nptr, 0);
+ m = strtol(nptr + 1, NULL, 0);
+ is_range = (*nptr == '-');
+ if (!n || !m || (*nptr != '-' && *nptr != '/'))
+ die("Couldn't parse pattern %s\n", argv[3]);
-int main(int argc, char **argv) {
- FILE *infile, *outfile;
- vpx_codec_ctx_t codec;
- int flags = 0, frame_cnt = 0;
- unsigned char file_hdr[IVF_FILE_HDR_SZ];
- unsigned char frame_hdr[IVF_FRAME_HDR_SZ];
- unsigned char frame[256*1024];
- vpx_codec_err_t res;
- int n, m, is_range;
-
- (void)res;
- /* Open files */
- if(argc!=4)
- die("Usage: %s <infile> <outfile> <N-M|N/M>\n", argv[0]);
- {
- char *nptr;
- n = strtol(argv[3], &nptr, 0);
- m = strtol(nptr+1, NULL, 0);
- is_range = *nptr == '-';
- if(!n || !m || (*nptr != '-' && *nptr != '/'))
- die("Couldn't parse pattern %s\n", argv[3]);
- }
- if(!(infile = fopen(argv[1], "rb")))
- die("Failed to open %s for reading", argv[1]);
- if(!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing", argv[2]);
-
- /* Read file header */
- if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
- && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
- && file_hdr[3]=='F'))
- die("%s is not an IVF file.", argv[1]);
-
- printf("Using %s\n",vpx_codec_iface_name(interface));
- /* Initialize codec */
- if(vpx_codec_dec_init(&codec, interface, NULL, flags))
- die_codec(&codec, "Failed to initialize decoder");
-
- /* Read each frame */
- while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
- int frame_sz = mem_get_le32(frame_hdr);
- vpx_codec_iter_t iter = NULL;
- vpx_image_t *img;
-
-
- frame_cnt++;
- if(frame_sz > sizeof(frame))
- die("Frame %d data too big for example code buffer", frame_sz);
- if(fread(frame, 1, frame_sz, infile) != frame_sz)
- die("Frame %d failed to read complete frame", frame_cnt);
-
- if((is_range && frame_cnt >= n && frame_cnt <= m)
- ||(!is_range && m - (frame_cnt-1)%m <= n)) {
- putc('X', stdout);
- continue;
- }
- putc('.', stdout);
- fflush(stdout);
- /* Decode the frame */
- if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0))
- die_codec(&codec, "Failed to decode frame");
-
- /* Write decoded data to disk */
- while((img = vpx_codec_get_frame(&codec, &iter))) {
- unsigned int plane, y;
-
- for(plane=0; plane < 3; plane++) {
- unsigned char *buf =img->planes[plane];
-
- for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
- (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
- outfile);
- buf += img->stride[plane];
- }
- }
- }
+ video = vpx_video_open_file(infile);
+ if (!video)
+ die("%s is not a supported input file.", argv[1]);
+
+ iface = get_codec_interface(vpx_video_get_fourcc(video));
+ if (!iface)
+ die("Unknown FOURCC code.");
+
+ printf("Using %s\n", vpx_codec_iface_name(iface));
+
+ if (vpx_codec_dec_init(&codec, iface, NULL, flags))
+ die_codec(&codec, "Failed to initialize decoder");
+
+ while (vpx_video_read_frame(video)) {
+ vpx_codec_iter_t iter = NULL;
+ vpx_image_t *img = NULL;
+ size_t frame_size = 0;
+ int skip;
+ const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
+ if (vpx_codec_decode(&codec, frame, frame_size, NULL, 0))
+ die_codec(&codec, "Failed to decode frame");
+
+ ++frame_cnt;
+
+ skip = (is_range && frame_cnt >= n && frame_cnt <= m) ||
+ (!is_range && m - (frame_cnt - 1) % m <= n);
+
+ if (!skip) {
+ putc('.', stdout);
+
+ while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL)
+ vpx_img_write(img, outfile);
+ } else {
+ putc('X', stdout);
}
- printf("Processed %d frames.\n",frame_cnt);
- if(vpx_codec_destroy(&codec))
- die_codec(&codec, "Failed to destroy codec");
- fclose(outfile);
- fclose(infile);
- return EXIT_SUCCESS;
+ fflush(stdout);
+ }
+
+ printf("Processed %d frames.\n", frame_cnt);
+ if (vpx_codec_destroy(&codec))
+ die_codec(&codec, "Failed to destroy codec");
+
+ printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
+ vpx_video_get_width(video), vpx_video_get_height(video), argv[2]);
+
+ vpx_video_close(video);
+
+ fclose(outfile);
+ fclose(infile);
+
+ return EXIT_SUCCESS;
}
diff --git a/examples/postproc.c b/examples/postproc.c
index 0d0d6b59a..4ec2d1f1c 100644
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -39,130 +39,105 @@
// postprocessors. VP8 is one example. The following sample code toggles
// postprocessing on and off every 15 frames.
-#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
+#include "./ivfdec.h"
+
#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
+#include "./tools_common.h"
+#include "./vpx_config.h"
-#define IVF_FILE_HDR_SZ (32)
-#define IVF_FRAME_HDR_SZ (12)
+static const char *exec_name;
-static unsigned int mem_get_le32(const unsigned char *mem) {
- return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
+void usage_exit() {
+ fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
+ exit(EXIT_FAILURE);
}
-static void die(const char *fmt, ...) {
- va_list ap;
+int main(int argc, char **argv) {
+ FILE *infile, *outfile;
+ vpx_codec_ctx_t codec;
+ vpx_codec_iface_t *iface;
+ int frame_cnt = 0;
+ vpx_video_t *video;
+ vpx_codec_err_t res;
+
+ exec_name = argv[0];
- va_start(ap, fmt);
- vprintf(fmt, ap);
- if(fmt[strlen(fmt)-1] != '\n')
- printf("\n");
- exit(EXIT_FAILURE);
-}
+ if (argc != 3)
+ die("Invalid number of arguments");
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
+ if (!(infile = fopen(argv[1], "rb")))
+ die("Failed to open %s for reading", argv[1]);
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if(detail)
- printf(" %s\n",detail);
- exit(EXIT_FAILURE);
-}
+ if (!(outfile = fopen(argv[2], "wb")))
+ die("Failed to open %s for writing", argv[2]);
+ video = vpx_video_open_file(infile);
+ if (!video)
+ die("%s is not a supported input file.", argv[1]);
-int main(int argc, char **argv) {
- FILE *infile, *outfile;
- vpx_codec_ctx_t codec;
- int flags = 0, frame_cnt = 0;
- unsigned char file_hdr[IVF_FILE_HDR_SZ];
- unsigned char frame_hdr[IVF_FRAME_HDR_SZ];
- unsigned char frame[256*1024];
- vpx_codec_err_t res;
-
- (void)res;
- /* Open files */
- if(argc!=3)
- die("Usage: %s <infile> <outfile>\n", argv[0]);
- if(!(infile = fopen(argv[1], "rb")))
- die("Failed to open %s for reading", argv[1]);
- if(!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing", argv[2]);
-
- /* Read file header */
- if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
- && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
- && file_hdr[3]=='F'))
- die("%s is not an IVF file.", argv[1]);
-
- printf("Using %s\n",vpx_codec_iface_name(interface));
- /* Initialize codec */
- res = vpx_codec_dec_init(&codec, interface, NULL,
- VPX_CODEC_USE_POSTPROC);
- if(res == VPX_CODEC_INCAPABLE) {
- printf("NOTICE: Postproc not supported by %s\n",
- vpx_codec_iface_name(interface));
- res = vpx_codec_dec_init(&codec, interface, NULL, flags);
- }
- if(res)
- die_codec(&codec, "Failed to initialize decoder");
-
- /* Read each frame */
- while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
- int frame_sz = mem_get_le32(frame_hdr);
- vpx_codec_iter_t iter = NULL;
- vpx_image_t *img;
-
-
- frame_cnt++;
- if(frame_sz > sizeof(frame))
- die("Frame %d data too big for example code buffer", frame_sz);
- if(fread(frame, 1, frame_sz, infile) != frame_sz)
- die("Frame %d failed to read complete frame", frame_cnt);
-
- #if CONFIG_VP9_DECODER
- if(frame_cnt%30 == 1) {
- vp8_postproc_cfg_t pp = {0, 0, 0};
-
- if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
- die_codec(&codec, "Failed to turn off postproc");
- } else if(frame_cnt%30 == 16) {
- vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0};
-
- if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
- die_codec(&codec, "Failed to turn on postproc");
- };
- #endif
- /* Decode the frame with 15ms deadline */
- if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 15000))
- die_codec(&codec, "Failed to decode frame");
-
- /* Write decoded data to disk */
- while((img = vpx_codec_get_frame(&codec, &iter))) {
- unsigned int plane, y;
-
- for(plane=0; plane < 3; plane++) {
- unsigned char *buf =img->planes[plane];
-
- for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
- (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
- outfile);
- buf += img->stride[plane];
- }
- }
- }
+ iface = get_codec_interface(vpx_video_get_fourcc(video));
+ if (!iface)
+ die("Unknown FOURCC code.");
+
+ printf("Using %s\n", vpx_codec_iface_name(iface));
+
+
+ res = vpx_codec_dec_init(&codec, iface, NULL, VPX_CODEC_USE_POSTPROC);
+ if (res == VPX_CODEC_INCAPABLE) {
+ printf("NOTICE: Postproc not supported.\n");
+ res = vpx_codec_dec_init(&codec, iface, NULL, 0);
+ }
+
+ if (res)
+ die_codec(&codec, "Failed to initialize decoder");
+
+ while (vpx_video_read_frame(video)) {
+ vpx_codec_iter_t iter = NULL;
+ vpx_image_t *img = NULL;
+ size_t frame_size = 0;
+ const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
+
+ ++frame_cnt;
+
+ if (frame_cnt % 30 == 1) {
+ vp8_postproc_cfg_t pp = {0, 0, 0};
+
+ if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
+ die_codec(&codec, "Failed to turn off postproc");
+ } else if (frame_cnt % 30 == 16) {
+ vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE,
+ 4, 0};
+ if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
+ die_codec(&codec, "Failed to turn on postproc");
+ };
+
+ // Decode the frame with 15ms deadline
+ if (vpx_codec_decode(&codec, frame, frame_size, NULL, 15000))
+ die_codec(&codec, "Failed to decode frame");
+
+ while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) {
+ vpx_img_write(img, outfile);
}
- printf("Processed %d frames.\n",frame_cnt);
- if(vpx_codec_destroy(&codec))
- die_codec(&codec, "Failed to destroy codec");
+ }
+
+ printf("Processed %d frames.\n", frame_cnt);
+ if (vpx_codec_destroy(&codec))
+ die_codec(&codec, "Failed to destroy codec");
+
+ printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
+ vpx_video_get_width(video), vpx_video_get_height(video), argv[2]);
+
+ vpx_video_close(video);
- fclose(outfile);
- fclose(infile);
- return EXIT_SUCCESS;
+ fclose(outfile);
+ fclose(infile);
+ return EXIT_SUCCESS;
}
diff --git a/examples/simple_decoder.c b/examples/simple_decoder.c
index 17a598739..23399f44f 100644
--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -77,110 +77,82 @@
// few exeptions, vpx_codec functions return an enumerated error status,
// with the value `0` indicating success.
-#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
+
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
+#include "./ivfdec.h"
+#include "./tools_common.h"
+#include "./vpx_config.h"
-#define IVF_FILE_HDR_SZ (32)
-#define IVF_FRAME_HDR_SZ (12)
+static const char *exec_name;
-static unsigned int mem_get_le32(const unsigned char *mem) {
- return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
+void usage_exit() {
+ fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
+ exit(EXIT_FAILURE);
}
-static void die(const char *fmt, ...) {
- va_list ap;
+int main(int argc, char **argv) {
+ FILE *infile, *outfile;
+ vpx_codec_ctx_t codec;
+ vpx_codec_iface_t *iface;
+ int flags = 0, frame_cnt = 0;
+ vpx_video_t *video;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- if(fmt[strlen(fmt)-1] != '\n')
- printf("\n");
- exit(EXIT_FAILURE);
-}
+ exec_name = argv[0];
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
+ if (argc != 3)
+ die("Invalid number of arguments");
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if(detail)
- printf(" %s\n",detail);
- exit(EXIT_FAILURE);
-}
+ if (!(infile = fopen(argv[1], "rb")))
+ die("Failed to open %s for reading", argv[1]);
+ if (!(outfile = fopen(argv[2], "wb")))
+ die("Failed to open %s for writing", argv[2]);
-int main(int argc, char **argv) {
- FILE *infile, *outfile;
- vpx_codec_ctx_t codec;
- int flags = 0, frame_cnt = 0;
- unsigned char file_hdr[IVF_FILE_HDR_SZ];
- unsigned char frame_hdr[IVF_FRAME_HDR_SZ];
- unsigned char frame[256*1024];
- vpx_codec_err_t res;
-
- (void)res;
- /* Open files */
- if(argc!=3)
- die("Usage: %s <infile> <outfile>\n", argv[0]);
- if(!(infile = fopen(argv[1], "rb")))
- die("Failed to open %s for reading", argv[1]);
- if(!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing", argv[2]);
-
- /* Read file header */
- if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
- && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
- && file_hdr[3]=='F'))
- die("%s is not an IVF file.", argv[1]);
-
- printf("Using %s\n",vpx_codec_iface_name(interface));
- /* Initialize codec */
- if(vpx_codec_dec_init(&codec, interface, NULL, flags))
- die_codec(&codec, "Failed to initialize decoder");
-
- /* Read each frame */
- while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
- int frame_sz = mem_get_le32(frame_hdr);
- vpx_codec_iter_t iter = NULL;
- vpx_image_t *img;
-
-
- frame_cnt++;
- if(frame_sz > sizeof(frame))
- die("Frame %d data too big for example code buffer", frame_sz);
- if(fread(frame, 1, frame_sz, infile) != frame_sz)
- die("Frame %d failed to read complete frame", frame_cnt);
-
- /* Decode the frame */
- if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0))
- die_codec(&codec, "Failed to decode frame");
-
- /* Write decoded data to disk */
- while((img = vpx_codec_get_frame(&codec, &iter))) {
- unsigned int plane, y;
-
- for(plane=0; plane < 3; plane++) {
- unsigned char *buf =img->planes[plane];
-
- for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
- (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
- outfile);
- buf += img->stride[plane];
- }
- }
- }
+ video = vpx_video_open_file(infile);
+ if (!video)
+ die("%s is not an IVF file.", argv[1]);
+
+ iface = get_codec_interface(vpx_video_get_fourcc(video));
+ if (!iface)
+ die("Unknown FOURCC code.");
+
+ printf("Using %s\n", vpx_codec_iface_name(iface));
+
+ if (vpx_codec_dec_init(&codec, iface, NULL, flags))
+ die_codec(&codec, "Failed to initialize decoder");
+
+ while (vpx_video_read_frame(video)) {
+ vpx_codec_iter_t iter = NULL;
+ vpx_image_t *img = NULL;
+ size_t frame_size = 0;
+ const unsigned char *frame = vpx_video_get_frame(video, &frame_size);
+ if (vpx_codec_decode(&codec, frame, frame_size, NULL, 0))
+ die_codec(&codec, "Failed to decode frame");
+
+ while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) {
+ vpx_img_write(img, outfile);
+ ++frame_cnt;
}
- printf("Processed %d frames.\n",frame_cnt);
- if(vpx_codec_destroy(&codec))
- die_codec(&codec, "Failed to destroy codec");
+ }
+
+ printf("Processed %d frames.\n", frame_cnt);
+ if (vpx_codec_destroy(&codec))
+ die_codec(&codec, "Failed to destroy codec");
+
+ printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
+ vpx_video_get_width(video), vpx_video_get_height(video), argv[2]);
+
+ vpx_video_close(video);
+
+ fclose(outfile);
+ fclose(infile);
- fclose(outfile);
- fclose(infile);
- return EXIT_SUCCESS;
+ return EXIT_SUCCESS;
}
diff --git a/ivfenc.c b/ivfenc.c
index 0041ff044..4a97c4273 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -10,7 +10,6 @@
#include "./ivfenc.h"
-#include "./tools_common.h"
#include "vpx/vpx_encoder.h"
#include "vpx_ports/mem_ops.h"
@@ -24,33 +23,31 @@ void ivf_write_file_header(FILE *outfile,
header[1] = 'K';
header[2] = 'I';
header[3] = 'F';
- mem_put_le16(header + 4, 0); /* version */
- mem_put_le16(header + 6, 32); /* headersize */
- mem_put_le32(header + 8, fourcc); /* four CC */
- mem_put_le16(header + 12, cfg->g_w); /* width */
- mem_put_le16(header + 14, cfg->g_h); /* height */
- mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
- mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
- mem_put_le32(header + 24, frame_cnt); /* length */
- mem_put_le32(header + 28, 0); /* unused */
-
- (void) fwrite(header, 1, 32, outfile);
+ mem_put_le16(header + 4, 0); // version
+ mem_put_le16(header + 6, 32); // header size
+ mem_put_le32(header + 8, fourcc); // fourcc
+ mem_put_le16(header + 12, cfg->g_w); // width
+ mem_put_le16(header + 14, cfg->g_h); // height
+ mem_put_le32(header + 16, cfg->g_timebase.den); // rate
+ mem_put_le32(header + 20, cfg->g_timebase.num); // scale
+ mem_put_le32(header + 24, frame_cnt); // length
+ mem_put_le32(header + 28, 0); // unused
+
+ fwrite(header, 1, 32, outfile);
}
-void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt) {
+void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) {
char header[12];
- vpx_codec_pts_t pts;
- pts = pkt->data.frame.pts;
- mem_put_le32(header, (int)pkt->data.frame.sz);
- mem_put_le32(header + 4, pts & 0xFFFFFFFF);
- mem_put_le32(header + 8, pts >> 32);
-
- (void) fwrite(header, 1, 12, outfile);
+ mem_put_le32(header, (int)frame_size);
+ mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
+ mem_put_le32(header + 8, (int)(pts >> 32));
+ fwrite(header, 1, 12, outfile);
}
-void ivf_write_frame_size(FILE *outfile, size_t size) {
+void ivf_write_frame_size(FILE *outfile, size_t frame_size) {
char header[4];
- mem_put_le32(header, (int)size);
- (void) fwrite(header, 1, 4, outfile);
+
+ mem_put_le32(header, (int)frame_size);
+ fwrite(header, 1, 4, outfile);
}
diff --git a/ivfenc.h b/ivfenc.h
index b486bc809..6623687e8 100644
--- a/ivfenc.h
+++ b/ivfenc.h
@@ -23,8 +23,10 @@ void ivf_write_file_header(FILE *outfile,
const struct vpx_codec_enc_cfg *cfg,
uint32_t fourcc,
int frame_cnt);
-void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt);
-void ivf_write_frame_size(FILE *outfile, size_t size);
+
+void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size);
+
+void ivf_write_frame_size(FILE *outfile, size_t frame_size);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/libs.mk b/libs.mk
index 470066a67..cc40451d4 100644
--- a/libs.mk
+++ b/libs.mk
@@ -214,8 +214,11 @@ CLEAN-OBJS += libvpx_srcs.txt
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
ifeq ($(CONFIG_MSVS),yes)
+obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
+ @cp $^ $@
+
+obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
- @cp $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat .
@echo " [CREATE] $@"
$(qexec)$(GEN_VCPROJ) \
--exe \
diff --git a/rate_hist.c b/rate_hist.c
new file mode 100644
index 000000000..1cef19bdd
--- /dev/null
+++ b/rate_hist.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "./rate_hist.h"
+
+#define RATE_BINS 100
+#define HIST_BAR_MAX 40
+
+struct hist_bucket {
+ int low;
+ int high;
+ int count;
+};
+
+struct rate_hist {
+ int64_t *pts;
+ int *sz;
+ int samples;
+ int frames;
+ struct hist_bucket bucket[RATE_BINS];
+ int total;
+};
+
+struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
+ const vpx_rational_t *fps) {
+ int i;
+ struct rate_hist *hist = malloc(sizeof(*hist));
+
+ // Determine the number of samples in the buffer. Use the file's framerate
+ // to determine the number of frames in rc_buf_sz milliseconds, with an
+ // adjustment (5/4) to account for alt-refs
+ hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
+
+ // prevent division by zero
+ if (hist->samples == 0)
+ hist->samples = 1;
+
+ hist->frames = 0;
+ hist->total = 0;
+
+ hist->pts = calloc(hist->samples, sizeof(*hist->pts));
+ hist->sz = calloc(hist->samples, sizeof(*hist->sz));
+ for (i = 0; i < RATE_BINS; i++) {
+ hist->bucket[i].low = INT_MAX;
+ hist->bucket[i].high = 0;
+ hist->bucket[i].count = 0;
+ }
+
+ return hist;
+}
+
+void destroy_rate_histogram(struct rate_hist *hist) {
+ if (hist) {
+ free(hist->pts);
+ free(hist->sz);
+ free(hist);
+ }
+}
+
+void update_rate_histogram(struct rate_hist *hist,
+ const vpx_codec_enc_cfg_t *cfg,
+ const vpx_codec_cx_pkt_t *pkt) {
+ int i;
+ int64_t then = 0;
+ int64_t avg_bitrate = 0;
+ int64_t sum_sz = 0;
+ const int64_t now = pkt->data.frame.pts * 1000 *
+ (uint64_t)cfg->g_timebase.num /
+ (uint64_t)cfg->g_timebase.den;
+
+ int idx = hist->frames++ % hist->samples;
+ hist->pts[idx] = now;
+ hist->sz[idx] = (int)pkt->data.frame.sz;
+
+ if (now < cfg->rc_buf_initial_sz)
+ return;
+
+ then = now;
+
+ /* Sum the size over the past rc_buf_sz ms */
+ for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) {
+ const int i_idx = (i - 1) % hist->samples;
+
+ then = hist->pts[i_idx];
+ if (now - then > cfg->rc_buf_sz)
+ break;
+ sum_sz += hist->sz[i_idx];
+ }
+
+ if (now == then)
+ return;
+
+ avg_bitrate = sum_sz * 8 * 1000 / (now - then);
+ idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
+ if (idx < 0)
+ idx = 0;
+ if (idx > RATE_BINS - 1)
+ idx = RATE_BINS - 1;
+ if (hist->bucket[idx].low > avg_bitrate)
+ hist->bucket[idx].low = (int)avg_bitrate;
+ if (hist->bucket[idx].high < avg_bitrate)
+ hist->bucket[idx].high = (int)avg_bitrate;
+ hist->bucket[idx].count++;
+ hist->total++;
+}
+
+static int merge_hist_buckets(struct hist_bucket *bucket,
+ int max_buckets, int *num_buckets) {
+ int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
+ int buckets = *num_buckets;
+ int i;
+
+ /* Find the extrema for this list of buckets */
+ big_bucket = small_bucket = 0;
+ for (i = 0; i < buckets; i++) {
+ if (bucket[i].count < bucket[small_bucket].count)
+ small_bucket = i;
+ if (bucket[i].count > bucket[big_bucket].count)
+ big_bucket = i;
+ }
+
+ /* If we have too many buckets, merge the smallest with an adjacent
+ * bucket.
+ */
+ while (buckets > max_buckets) {
+ int last_bucket = buckets - 1;
+
+ /* merge the small bucket with an adjacent one. */
+ if (small_bucket == 0)
+ merge_bucket = 1;
+ else if (small_bucket == last_bucket)
+ merge_bucket = last_bucket - 1;
+ else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
+ merge_bucket = small_bucket - 1;
+ else
+ merge_bucket = small_bucket + 1;
+
+ assert(abs(merge_bucket - small_bucket) <= 1);
+ assert(small_bucket < buckets);
+ assert(big_bucket < buckets);
+ assert(merge_bucket < buckets);
+
+ if (merge_bucket < small_bucket) {
+ bucket[merge_bucket].high = bucket[small_bucket].high;
+ bucket[merge_bucket].count += bucket[small_bucket].count;
+ } else {
+ bucket[small_bucket].high = bucket[merge_bucket].high;
+ bucket[small_bucket].count += bucket[merge_bucket].count;
+ merge_bucket = small_bucket;
+ }
+
+ assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
+
+ buckets--;
+
+ /* Remove the merge_bucket from the list, and find the new small
+ * and big buckets while we're at it
+ */
+ big_bucket = small_bucket = 0;
+ for (i = 0; i < buckets; i++) {
+ if (i > merge_bucket)
+ bucket[i] = bucket[i + 1];
+
+ if (bucket[i].count < bucket[small_bucket].count)
+ small_bucket = i;
+ if (bucket[i].count > bucket[big_bucket].count)
+ big_bucket = i;
+ }
+ }
+
+ *num_buckets = buckets;
+ return bucket[big_bucket].count;
+}
+
+static void show_histogram(const struct hist_bucket *bucket,
+ int buckets, int total, int scale) {
+ const char *pat1, *pat2;
+ int i;
+
+ switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
+ case 1:
+ case 2:
+ pat1 = "%4d %2s: ";
+ pat2 = "%4d-%2d: ";
+ break;
+ case 3:
+ pat1 = "%5d %3s: ";
+ pat2 = "%5d-%3d: ";
+ break;
+ case 4:
+ pat1 = "%6d %4s: ";
+ pat2 = "%6d-%4d: ";
+ break;
+ case 5:
+ pat1 = "%7d %5s: ";
+ pat2 = "%7d-%5d: ";
+ break;
+ case 6:
+ pat1 = "%8d %6s: ";
+ pat2 = "%8d-%6d: ";
+ break;
+ case 7:
+ pat1 = "%9d %7s: ";
+ pat2 = "%9d-%7d: ";
+ break;
+ default:
+ pat1 = "%12d %10s: ";
+ pat2 = "%12d-%10d: ";
+ break;
+ }
+
+ for (i = 0; i < buckets; i++) {
+ int len;
+ int j;
+ float pct;
+
+ pct = (float)(100.0 * bucket[i].count / total);
+ len = HIST_BAR_MAX * bucket[i].count / scale;
+ if (len < 1)
+ len = 1;
+ assert(len <= HIST_BAR_MAX);
+
+ if (bucket[i].low == bucket[i].high)
+ fprintf(stderr, pat1, bucket[i].low, "");
+ else
+ fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
+
+ for (j = 0; j < HIST_BAR_MAX; j++)
+ fprintf(stderr, j < len ? "=" : " ");
+ fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
+ }
+}
+
+void show_q_histogram(const int counts[64], int max_buckets) {
+ struct hist_bucket bucket[64];
+ int buckets = 0;
+ int total = 0;
+ int scale;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (counts[i]) {
+ bucket[buckets].low = bucket[buckets].high = i;
+ bucket[buckets].count = counts[i];
+ buckets++;
+ total += counts[i];
+ }
+ }
+
+ fprintf(stderr, "\nQuantizer Selection:\n");
+ scale = merge_hist_buckets(bucket, max_buckets, &buckets);
+ show_histogram(bucket, buckets, total, scale);
+}
+
+void show_rate_histogram(struct rate_hist *hist,
+ const vpx_codec_enc_cfg_t *cfg, int max_buckets) {
+ int i, scale;
+ int buckets = 0;
+
+ for (i = 0; i < RATE_BINS; i++) {
+ if (hist->bucket[i].low == INT_MAX)
+ continue;
+ hist->bucket[buckets++] = hist->bucket[i];
+ }
+
+ fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
+ scale = merge_hist_buckets(hist->bucket, max_buckets, &buckets);
+ show_histogram(hist->bucket, buckets, hist->total, scale);
+}
diff --git a/rate_hist.h b/rate_hist.h
new file mode 100644
index 000000000..00a1676a6
--- /dev/null
+++ b/rate_hist.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RATE_HIST_H_
+#define RATE_HIST_H_
+
+#include "vpx/vpx_encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rate_hist;
+
+struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
+ const vpx_rational_t *fps);
+
+void destroy_rate_histogram(struct rate_hist *hist);
+
+void update_rate_histogram(struct rate_hist *hist,
+ const vpx_codec_enc_cfg_t *cfg,
+ const vpx_codec_cx_pkt_t *pkt);
+
+void show_q_histogram(const int counts[64], int max_buckets);
+
+void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg,
+ int max_buckets);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // RATE_HIST_H_
diff --git a/test/svc_test.cc b/test/svc_test.cc
index 3ddd9c145..75659d50d 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -234,7 +234,7 @@ TEST_F(SvcTest, FirstFrameHasLayers) {
video.Begin();
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
EXPECT_EQ(VPX_CODEC_OK, res);
const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
@@ -262,7 +262,7 @@ TEST_F(SvcTest, EncodeThreeFrames) {
video.Begin();
// This frame is a keyframe.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
@@ -275,7 +275,7 @@ TEST_F(SvcTest, EncodeThreeFrames) {
video.Next();
// This is a P-frame.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
@@ -288,7 +288,7 @@ TEST_F(SvcTest, EncodeThreeFrames) {
video.Next();
// This is a P-frame.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
diff --git a/tools_common.c b/tools_common.c
index 9c24983de..85bedc99d 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -15,6 +15,10 @@
#include <stdlib.h>
#include <string.h>
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#include "vpx/vp8dx.h"
+#endif
+
#if defined(_WIN32) || defined(__OS2__)
#include <io.h>
#include <fcntl.h>
@@ -60,6 +64,15 @@ void warn(const char *fmt, ...) {
LOG_ERROR("Warning");
}
+void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
+ const char *detail = vpx_codec_error_detail(ctx);
+
+ printf("%s: %s\n", s, vpx_codec_error(ctx));
+ if (detail)
+ printf(" %s\n", detail);
+ exit(EXIT_FAILURE);
+}
+
uint16_t mem_get_le16(const void *data) {
uint16_t val;
const uint8_t *mem = (const uint8_t*)data;
@@ -130,3 +143,34 @@ int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) {
return shortread;
}
+
+vpx_codec_iface_t *get_codec_interface(unsigned int fourcc) {
+ switch (fourcc) {
+#if CONFIG_VP8_DECODER
+ case VP8_FOURCC:
+ return vpx_codec_vp8_dx();
+#endif
+#if CONFIG_VP9_DECODER
+ case VP9_FOURCC:
+ return vpx_codec_vp9_dx();
+#endif
+ default:
+ return NULL;
+ }
+ return NULL;
+}
+
+void vpx_img_write(const vpx_image_t *img, FILE *file) {
+ int plane, y;
+
+ for (plane = 0; plane < 3; ++plane) {
+ const unsigned char *buf = img->planes[plane];
+ const int stride = img->stride[plane];
+ const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
+ const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
+ for (y = 0; y < h; ++y) {
+ fwrite(buf, 1, w, file);
+ buf += stride;
+ }
+ }
+}
diff --git a/tools_common.h b/tools_common.h
index 1d70ab5e8..967b7a1fb 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -13,6 +13,7 @@
#include <stdio.h>
#include "./vpx_config.h"
+#include "vpx/vpx_codec.h"
#include "vpx/vpx_image.h"
#include "vpx/vpx_integer.h"
@@ -112,6 +113,8 @@ void die(const char *fmt, ...);
void fatal(const char *fmt, ...);
void warn(const char *fmt, ...);
+void die_codec(vpx_codec_ctx_t *ctx, const char *s);
+
/* The tool including this file must define usage_exit() */
void usage_exit();
@@ -120,6 +123,12 @@ uint32_t mem_get_le32(const void *data);
int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame);
+vpx_codec_iface_t *get_codec_interface(unsigned int fourcc);
+
+// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part
+// of vpx_image_t support
+void vpx_img_write(const vpx_image_t *img, FILE *file);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index baa943b9c..279f678b1 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -19,6 +19,7 @@
EXPORT |vp9_tm_predictor_4x4_neon|
EXPORT |vp9_tm_predictor_8x8_neon|
EXPORT |vp9_tm_predictor_16x16_neon|
+ EXPORT |vp9_tm_predictor_32x32_neon|
ARM
REQUIRE8
PRESERVE8
@@ -502,4 +503,134 @@ loop_16x16_neon
bx lr
ENDP ; |vp9_tm_predictor_16x16_neon|
+;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
+; const uint8_t *above,
+; const uint8_t *left)
+; r0 uint8_t *dst
+; r1 ptrdiff_t y_stride
+; r2 const uint8_t *above
+; r3 const uint8_t *left
+
+|vp9_tm_predictor_32x32_neon| PROC
+ ; Load ytop_left = above[-1];
+ sub r12, r2, #1
+ ldrb r12, [r12]
+ vdup.u8 q0, r12
+
+ ; Load above 32 pixels
+ vld1.8 q1, [r2]!
+ vld1.8 q2, [r2]
+
+ ; preload 8 left pixels
+ vld1.8 d26, [r3]!
+
+ ; Compute above - ytop_left
+ vsubl.u8 q8, d2, d0
+ vsubl.u8 q9, d3, d1
+ vsubl.u8 q10, d4, d0
+ vsubl.u8 q11, d5, d1
+
+ vmovl.u8 q3, d26
+
+ ; Load left row by row and compute left + (above - ytop_left)
+ ; Process 8 rows in each single loop and loop 4 times to process 32 rows.
+ mov r2, #4
+
+loop_32x32_neon
+ ; Process two rows.
+ vdup.16 q0, d6[0]
+ vdup.16 q2, d6[1]
+ vadd.s16 q12, q0, q8
+ vadd.s16 q13, q0, q9
+ vadd.s16 q14, q0, q10
+ vadd.s16 q15, q0, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vdup.16 q1, d6[2]
+ vdup.16 q2, d6[3]
+ vst1.64 {d24-d27}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q12, q1, q8
+ vadd.s16 q13, q1, q9
+ vadd.s16 q14, q1, q10
+ vadd.s16 q15, q1, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vdup.16 q0, d7[0]
+ vdup.16 q2, d7[1]
+ vst1.64 {d24-d27}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q12, q0, q8
+ vadd.s16 q13, q0, q9
+ vadd.s16 q14, q0, q10
+ vadd.s16 q15, q0, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vdup.16 q0, d7[2]
+ vdup.16 q2, d7[3]
+ vst1.64 {d24-d27}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q12, q0, q8
+ vadd.s16 q13, q0, q9
+ vadd.s16 q14, q0, q10
+ vadd.s16 q15, q0, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vld1.8 d0, [r3]! ; preload 8 left pixels
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vmovl.u8 q3, d0
+ vst1.64 {d24-d27}, [r0], r1
+
+ subs r2, r2, #1
+ bgt loop_32x32_neon
+
+ bx lr
+ ENDP ; |vp9_tm_predictor_32x32_neon|
+
END
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 39ea4922b..9e16d8f78 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -252,7 +252,7 @@ typedef struct macroblockd {
/* Inverse transform function pointers. */
void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
- struct subpix_fn_table subpix;
+ const interp_kernel *interp_kernel;
int corrupted;
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 9f400e9ad..dbde6d551 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -106,6 +106,7 @@ static const interp_kernel* vp9_filter_kernels[4] = {
};
const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
+ assert(filter != SWITCHABLE);
return vp9_filter_kernels[filter];
}
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index c8a0b6130..b611e304c 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -35,11 +35,6 @@ typedef enum {
typedef int16_t interp_kernel[SUBPEL_TAPS];
-struct subpix_fn_table {
- const interp_kernel *filter_x;
- const interp_kernel *filter_y;
-};
-
const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter);
extern const interp_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 533f7f361..20b78bfed 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
}
}
-static void idct4_1d(const int16_t *input, int16_t *output) {
+static void idct4(const int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
// stage 1
@@ -124,7 +124,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
// Rows
for (i = 0; i < 4; ++i) {
- idct4_1d(input, outptr);
+ idct4(input, outptr);
input += 4;
outptr += 4;
}
@@ -133,7 +133,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- idct4_1d(temp_in, temp_out);
+ idct4(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * stride + i]);
@@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
}
}
-static void idct8_1d(const int16_t *input, int16_t *output) {
+static void idct8(const int16_t *input, int16_t *output) {
int16_t step1[8], step2[8];
int temp1, temp2;
// stage 1
@@ -174,7 +174,7 @@ static void idct8_1d(const int16_t *input, int16_t *output) {
step1[6] = dct_const_round_shift(temp2);
// stage 2 & stage 3 - even half
- idct4_1d(step1, step1);
+ idct4(step1, step1);
// stage 2 - odd half
step2[4] = step1[4] + step1[5];
@@ -209,7 +209,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
for (i = 0; i < 8; ++i) {
- idct8_1d(input, outptr);
+ idct8(input, outptr);
input += 8;
outptr += 8;
}
@@ -218,7 +218,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- idct8_1d(temp_in, temp_out);
+ idct8(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
@@ -238,7 +238,7 @@ void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
-static void iadst4_1d(const int16_t *input, int16_t *output) {
+static void iadst4(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[0];
@@ -283,10 +283,10 @@ static void iadst4_1d(const int16_t *input, int16_t *output) {
void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
const transform_2d IHT_4[] = {
- { idct4_1d, idct4_1d }, // DCT_DCT = 0
- { iadst4_1d, idct4_1d }, // ADST_DCT = 1
- { idct4_1d, iadst4_1d }, // DCT_ADST = 2
- { iadst4_1d, iadst4_1d } // ADST_ADST = 3
+ { idct4, idct4 }, // DCT_DCT = 0
+ { iadst4, idct4 }, // ADST_DCT = 1
+ { idct4, iadst4 }, // DCT_ADST = 2
+ { iadst4, iadst4 } // ADST_ADST = 3
};
int i, j;
@@ -311,7 +311,7 @@ void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
+ dest[j * stride + i]);
}
}
-static void iadst8_1d(const int16_t *input, int16_t *output) {
+static void iadst8(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[7];
@@ -389,10 +389,10 @@ static void iadst8_1d(const int16_t *input, int16_t *output) {
}
static const transform_2d IHT_8[] = {
- { idct8_1d, idct8_1d }, // DCT_DCT = 0
- { iadst8_1d, idct8_1d }, // ADST_DCT = 1
- { idct8_1d, iadst8_1d }, // DCT_ADST = 2
- { iadst8_1d, iadst8_1d } // ADST_ADST = 3
+ { idct8, idct8 }, // DCT_DCT = 0
+ { iadst8, idct8 }, // ADST_DCT = 1
+ { idct8, iadst8 }, // DCT_ADST = 2
+ { iadst8, iadst8 } // ADST_ADST = 3
};
void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
@@ -430,7 +430,7 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
// only first 4 row has non-zero coefs
for (i = 0; i < 4; ++i) {
- idct8_1d(input, outptr);
+ idct8(input, outptr);
input += 8;
outptr += 8;
}
@@ -439,14 +439,14 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- idct8_1d(temp_in, temp_out);
+ idct8(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
}
}
-static void idct16_1d(const int16_t *input, int16_t *output) {
+static void idct16(const int16_t *input, int16_t *output) {
int16_t step1[16], step2[16];
int temp1, temp2;
@@ -619,7 +619,7 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
for (i = 0; i < 16; ++i) {
- idct16_1d(input, outptr);
+ idct16(input, outptr);
input += 16;
outptr += 16;
}
@@ -628,14 +628,14 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- idct16_1d(temp_in, temp_out);
+ idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
-static void iadst16_1d(const int16_t *input, int16_t *output) {
+static void iadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -807,10 +807,10 @@ static void iadst16_1d(const int16_t *input, int16_t *output) {
}
static const transform_2d IHT_16[] = {
- { idct16_1d, idct16_1d }, // DCT_DCT = 0
- { iadst16_1d, idct16_1d }, // ADST_DCT = 1
- { idct16_1d, iadst16_1d }, // DCT_ADST = 2
- { iadst16_1d, iadst16_1d } // ADST_ADST = 3
+ { idct16, idct16 }, // DCT_DCT = 0
+ { iadst16, idct16 }, // ADST_DCT = 1
+ { idct16, iadst16 }, // DCT_ADST = 2
+ { iadst16, iadst16 } // ADST_ADST = 3
};
void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
@@ -848,7 +848,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) {
- idct16_1d(input, outptr);
+ idct16(input, outptr);
input += 16;
outptr += 16;
}
@@ -857,7 +857,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j*16 + i];
- idct16_1d(temp_in, temp_out);
+ idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
@@ -877,7 +877,7 @@ void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
-static void idct32_1d(const int16_t *input, int16_t *output) {
+static void idct32(const int16_t *input, int16_t *output) {
int16_t step1[32], step2[32];
int temp1, temp2;
@@ -1263,7 +1263,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
if (zero_coeff[0] | zero_coeff[1])
- idct32_1d(input, outptr);
+ idct32(input, outptr);
else
vpx_memset(outptr, 0, sizeof(int16_t) * 32);
input += 32;
@@ -1274,7 +1274,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- idct32_1d(temp_in, temp_out);
+ idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
@@ -1290,7 +1290,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
// Rows
// only upper-left 8x8 has non-zero coeff
for (i = 0; i < 8; ++i) {
- idct32_1d(input, outptr);
+ idct32(input, outptr);
input += 32;
outptr += 32;
}
@@ -1299,7 +1299,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- idct32_1d(temp_in, temp_out);
+ idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 9df76ded3..564e4195f 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -55,6 +55,7 @@ extern "C" {
MODE_FIRSTPASS = 0x3,
MODE_SECONDPASS = 0x4,
MODE_SECONDPASS_BEST = 0x5,
+ MODE_REALTIME = 0x6,
} MODE;
typedef enum {
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index b5a9248c3..d554cc0ed 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -69,13 +69,11 @@ static void inter_predictor(const uint8_t *src, int src_stride,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
- const struct subpix_fn_table *subpix,
+ const interp_kernel *kernel,
int xs, int ys) {
sf->predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
- subpix->filter_x[subpel_x], xs,
- subpix->filter_y[subpel_y], ys,
- w, h);
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
}
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -83,7 +81,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
- const struct subpix_fn_table *subpix,
+ const interp_kernel *kernel,
enum mv_precision precision,
int x, int y) {
const int is_q4 = precision == MV_PRECISION_Q4;
@@ -96,7 +94,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, ref, subpix, sf->x_step_q4, sf->y_step_q4);
+ sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
}
static INLINE int round_mv_comp_q4(int value) {
@@ -198,7 +196,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+ (scaled_mv.col >> SUBPEL_BITS);
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
+ subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel,
+ xs, ys);
}
}
@@ -367,7 +366,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
}
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
+ subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys);
}
}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 58f4b417e..3345d83e8 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -18,7 +18,6 @@
extern "C" {
#endif
-struct subpix_fn_table;
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize);
@@ -36,7 +35,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
const MV *mv_q3,
const struct scale_factors *sf,
int w, int h, int do_avg,
- const struct subpix_fn_table *subpix,
+ const interp_kernel *kernel,
enum mv_precision precision,
int x, int y);
@@ -94,11 +93,6 @@ static void setup_pre_planes(MACROBLOCKD *xd, int idx,
}
}
-static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, int ref0, int ref1) {
- xd->block_refs[0] = &cm->frame_refs[ref0 >= 0 ? ref0 : 0];
- xd->block_refs[1] = &cm->frame_refs[ref1 >= 0 ? ref1 : 0];
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 8fd6db8e9..66defb915 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -174,7 +174,7 @@ prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const ui
specialize vp9_v_predictor_32x32 $sse2_x86inc neon
prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_32x32 $sse2_x86_64
+specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon
prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_predictor_32x32 $sse2_x86inc
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index f95423678..8a2297feb 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -23,6 +23,68 @@ typedef void filter8_1dfunction (
const short *filter
);
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h) { \
+ if (step_q4 == 16 && filter[3] != 128) { \
+ while (w >= 16) { \
+ vp9_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, \
+ dst, dst_stride, \
+ h, filter); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+ while (w >= 8) { \
+ vp9_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, \
+ dst, dst_stride, \
+ h, filter); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+ while (w >= 4) { \
+ vp9_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, \
+ dst, dst_stride, \
+ h, filter); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
+ } \
+ if (w) { \
+ vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h); \
+ } \
+}
+
+#define FUN_CONV_2D(avg, opt) \
+void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h) { \
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
+ \
+ assert(w <= 64); \
+ assert(h <= 64); \
+ if (x_step_q4 == 16 && y_step_q4 == 16) { \
+ vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h + 7); \
+ vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h); \
+ } else { \
+ vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
+ } \
+}
+
#if HAVE_SSSE3
filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
@@ -37,201 +99,44 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Ensure the filter can be compressed to int16_t. */
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- } else {
- vp9_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
-
-void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- } else {
- vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
+// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+ ssse3);
+
+// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, ssse3);
+FUN_CONV_2D(avg_ , ssse3);
#endif
#if HAVE_SSE2
@@ -248,199 +153,41 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
-void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Ensure the filter can be compressed to int16_t. */
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_avg_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_avg_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_avg_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_avg_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_avg_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_avg_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- } else {
- vp9_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
-
-void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_avg_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- } else {
- vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
+// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
+
+// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, sse2);
+FUN_CONV_2D(avg_ , sse2);
#endif
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 2f6149464..13a5b5a82 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -180,7 +180,7 @@ static INLINE void transpose_4x4(__m128i *res) {
res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
}
-static void idct4_1d_sse2(__m128i *in) {
+static void idct4_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
@@ -216,7 +216,7 @@ static void idct4_1d_sse2(__m128i *in) {
in[1] = _mm_shuffle_epi32(in[1], 0x4E);
}
-static void iadst4_1d_sse2(__m128i *in) {
+static void iadst4_sse2(__m128i *in) {
const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9);
const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
@@ -276,20 +276,20 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
- idct4_1d_sse2(in);
- idct4_1d_sse2(in);
+ idct4_sse2(in);
+ idct4_sse2(in);
break;
case 1: // ADST_DCT
- idct4_1d_sse2(in);
- iadst4_1d_sse2(in);
+ idct4_sse2(in);
+ iadst4_sse2(in);
break;
case 2: // DCT_ADST
- iadst4_1d_sse2(in);
- idct4_1d_sse2(in);
+ iadst4_sse2(in);
+ idct4_sse2(in);
break;
case 3: // ADST_ADST
- iadst4_1d_sse2(in);
- iadst4_1d_sse2(in);
+ iadst4_sse2(in);
+ iadst4_sse2(in);
break;
default:
assert(0);
@@ -455,7 +455,7 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
res1 = _mm_packs_epi32(tmp2, tmp3); \
}
-#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) \
{ \
/* Stage1 */ \
@@ -573,7 +573,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
- IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
}
@@ -674,7 +674,7 @@ static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
}
-static void idct8_1d_sse2(__m128i *in) {
+static void idct8_sse2(__m128i *in) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
@@ -695,11 +695,11 @@ static void idct8_1d_sse2(__m128i *in) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
- IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]);
}
-static void iadst8_1d_sse2(__m128i *in) {
+static void iadst8_sse2(__m128i *in) {
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
@@ -946,20 +946,20 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
- idct8_1d_sse2(in);
- idct8_1d_sse2(in);
+ idct8_sse2(in);
+ idct8_sse2(in);
break;
case 1: // ADST_DCT
- idct8_1d_sse2(in);
- iadst8_1d_sse2(in);
+ idct8_sse2(in);
+ iadst8_sse2(in);
break;
case 2: // DCT_ADST
- iadst8_1d_sse2(in);
- idct8_1d_sse2(in);
+ iadst8_sse2(in);
+ idct8_sse2(in);
break;
case 3: // ADST_ADST
- iadst8_1d_sse2(in);
- iadst8_1d_sse2(in);
+ iadst8_sse2(in);
+ iadst8_sse2(in);
break;
default:
assert(0);
@@ -1104,7 +1104,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
- IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero,
+ IDCT8(in0, in1, in2, in3, zero, zero, zero, zero,
in0, in1, in2, in3, in4, in5, in6, in7);
// Final rounding and shift
in0 = _mm_adds_epi16(in0, final_rounding);
@@ -1135,7 +1135,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, in7);
}
-#define IDCT16_1D \
+#define IDCT16 \
/* Stage2 */ \
{ \
const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \
@@ -1264,7 +1264,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
stp2_10, stp2_13, stp2_11, stp2_12) \
}
-#define IDCT16_10_1D \
+#define IDCT16_10 \
/* Stage2 */ \
{ \
const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \
@@ -1437,7 +1437,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in, in);
array_transpose_8x8(in+8, in+8);
- IDCT16_1D
+ IDCT16
// Stage7
curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -1465,7 +1465,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(l+i*8, in);
array_transpose_8x8(r+i*8, in+8);
- IDCT16_1D
+ IDCT16
// 2-D
in[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -1590,7 +1590,7 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
res0[15] = tbuf[7];
}
-static void iadst16_1d_8col(__m128i *in) {
+static void iadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2060,7 +2060,7 @@ static void iadst16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-static void idct16_1d_8col(__m128i *in) {
+static void idct16_8col(__m128i *in) {
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
@@ -2404,16 +2404,16 @@ static void idct16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(s[0], s[15]);
}
-static void idct16_1d_sse2(__m128i *in0, __m128i *in1) {
+static void idct16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
- idct16_1d_8col(in0);
- idct16_1d_8col(in1);
+ idct16_8col(in0);
+ idct16_8col(in1);
}
-static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) {
+static void iadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
- iadst16_1d_8col(in0);
- iadst16_1d_8col(in1);
+ iadst16_8col(in0);
+ iadst16_8col(in1);
}
static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
@@ -2502,20 +2502,20 @@ void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
- idct16_1d_sse2(in0, in1);
- idct16_1d_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
break;
case 1: // ADST_DCT
- idct16_1d_sse2(in0, in1);
- iadst16_1d_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
break;
case 2: // DCT_ADST
- iadst16_1d_sse2(in0, in1);
- idct16_1d_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
break;
case 3: // ADST_ADST
- iadst16_1d_sse2(in0, in1);
- iadst16_1d_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
break;
default:
assert(0);
@@ -2732,7 +2732,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 2; i++) {
array_transpose_4X8(l + 8*i, in);
- IDCT16_10_1D
+ IDCT16_10
// Stage7
in[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -2814,7 +2814,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
input += 8; \
} \
-#define IDCT32_1D_34 \
+#define IDCT32_34 \
/* Stage1 */ \
{ \
const __m128i zero = _mm_setzero_si128();\
@@ -3115,7 +3115,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
}
-#define IDCT32_1D \
+#define IDCT32 \
/* Stage1 */ \
{ \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \
@@ -3554,7 +3554,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in+16, in+16);
array_transpose_8x8(in+24, in+24);
- IDCT32_1D
+ IDCT32
// 1_D: Store 32 intermediate results for each 8x32 block.
col[0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3593,7 +3593,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block
array_transpose_8x8(col+i*8, in);
- IDCT32_1D_34
+ IDCT32_34
// 2_D: Calculate the results and store them to destination.
in[0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3922,7 +3922,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in+16, in+16);
array_transpose_8x8(in+24, in+24);
- IDCT32_1D
+ IDCT32
// 1_D: Store 32 intermediate results for each 8x32 block.
col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3969,7 +3969,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(col+j+64, in+16);
array_transpose_8x8(col+j+96, in+24);
- IDCT32_1D
+ IDCT32
// 2_D: Calculate the results and store them to destination.
in[0] = _mm_add_epi16(stp1_0, stp1_31);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 466b1808f..1d9be5322 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -421,8 +421,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
if (has_second_ref(mbmi))
set_ref(cm, xd, 1, mi_row, mi_col);
- xd->subpix.filter_x = xd->subpix.filter_y =
- vp9_get_interp_kernel(mbmi->interp_filter);
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
// Prediction
vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 0f4a6bb63..a840b480a 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -997,7 +997,7 @@ static INLINE int half_round_shift(int input) {
return rv;
}
-static void dct32_1d(const int *input, int *output, int round) {
+static void fdct32(const int *input, int *output, int round) {
int step[32];
// Stage 1
step[0] = input[0] + input[(32 - 1)];
@@ -1329,7 +1329,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
- dct32_1d(temp_in, temp_out, 0);
+ fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
@@ -1339,13 +1339,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
- dct32_1d(temp_in, temp_out, 0);
+ fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
}
-// Note that although we use dct_32_round in dct32_1d computation flow,
+// Note that although we use dct_32_round in dct32 computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.
void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
@@ -1357,7 +1357,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
- dct32_1d(temp_in, temp_out, 0);
+ fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
// output[j * 32 + i] = (temp_out[j] + 1) >> 2;
@@ -1370,7 +1370,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
- dct32_1d(temp_in, temp_out, 1);
+ fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out[j + i * 32] = temp_out[j];
}
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 317ac9815..b97fd0293 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -380,8 +380,10 @@ static void select_in_frame_q_segment(VP9_COMP *cpi,
segment = 0;
}
- complexity_metric =
- clamp((int)((projected_rate * 64) / target_rate), 16, 255);
+ if (target_rate > 0) {
+ complexity_metric =
+ clamp((int)((projected_rate * 64) / target_rate), 16, 255);
+ }
}
// Fill in the entires in the segment map corresponding to this SB64
@@ -1029,131 +1031,171 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
}
return 0;
}
-
-// TODO(jingning) This currently serves as a test framework for non-RD mode
-// decision. To be continued on optimizing the partition type decisions.
-static void pick_partition_type(VP9_COMP *cpi,
- const TileInfo *const tile,
- MODE_INFO **mi_8x8, TOKENEXTRA **tp,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon) {
+static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
+ BLOCK_SIZE bsize, int output_enabled) {
+ int i;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
- const int mi_stride = cm->mode_info_stride;
- const int num_8x8_subsize = (num_8x8_blocks_wide_lookup[bsize] >> 1);
- int i;
- PARTITION_TYPE partition = PARTITION_NONE;
- BLOCK_SIZE subsize;
- BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
- int sub_rate[4] = {0};
- int64_t sub_dist[4] = {0};
- int mi_offset;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
- return;
+ const int mb_mode_index = ctx->best_mode_index;
+ int max_plane;
- partition = partition_lookup[b_width_log2(bsize)][bs_type];
- subsize = get_subsize(bsize, partition);
+ max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+ for (i = 0; i < max_plane; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][1];
+ p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
+ p[i].eobs = ctx->eobs_pbuf[i][1];
+ }
+
+ for (i = max_plane; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][2];
+ p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+ p[i].eobs = ctx->eobs_pbuf[i][2];
+ }
+
+ x->skip = ctx->skip;
+
+ if (frame_is_intra_only(cm)) {
+#if CONFIG_INTERNAL_STATS
+ static const int kf_mode_index[] = {
+ THR_DC /*DC_PRED*/,
+ THR_V_PRED /*V_PRED*/,
+ THR_H_PRED /*H_PRED*/,
+ THR_D45_PRED /*D45_PRED*/,
+ THR_D135_PRED /*D135_PRED*/,
+ THR_D117_PRED /*D117_PRED*/,
+ THR_D153_PRED /*D153_PRED*/,
+ THR_D207_PRED /*D207_PRED*/,
+ THR_D63_PRED /*D63_PRED*/,
+ THR_TM /*TM_PRED*/,
+ };
+ ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
+#endif
+ } else {
+ // Note how often each mode chosen as best
+ cpi->mode_chosen_counts[mb_mode_index]++;
+ if (is_inter_block(mbmi) &&
+ (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
+
+ if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
+ }
+}
+
+static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize) {
+ MACROBLOCK *const x = &cpi->mb;
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
// there is nothing to be done.
- if (x->ab_index != 0) {
- *rate = 0;
- *dist = 0;
+ if (x->ab_index > 0)
return;
- }
+ }
+ set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ update_state_rt(cpi, get_block_context(x, bsize), bsize, output_enabled);
+
+ encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+ update_stats(cpi);
+
+ (*tp)->token = EOSB_TOKEN;
+ (*tp)++;
+}
+
+static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ int ctx;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ if (bsize >= BLOCK_8X8) {
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ const int idx_str = xd->mode_info_stride * mi_row + mi_col;
+ MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
+ ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+ mi_row, mi_col, bsize);
+ subsize = mi_8x8[0]->mbmi.sb_type;
+
} else {
- *(get_sb_partitioning(x, bsize)) = subsize;
+ ctx = 0;
+ subsize = BLOCK_4X4;
}
+ partition = partition_lookup[bsl][subsize];
+
switch (partition) {
case PARTITION_NONE:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist,
- bsize, get_block_context(x, bsize), INT64_MAX);
+ if (output_enabled && bsize >= BLOCK_8X8)
+ cm->counts.partition[ctx][PARTITION_NONE]++;
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
break;
- case PARTITION_HORZ:
+ case PARTITION_VERT:
+ if (output_enabled)
+ cm->counts.partition[ctx][PARTITION_VERT]++;
*get_sb_index(x, subsize) = 0;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
- subsize, get_block_context(x, subsize), INT64_MAX);
- if (bsize >= BLOCK_8X8 && mi_row + num_8x8_subsize < cm->mi_rows) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+ if (mi_col + hbs < cm->mi_cols) {
*get_sb_index(x, subsize) = 1;
- rd_pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col,
- &sub_rate[1], &sub_dist[1], subsize,
- get_block_context(x, subsize), INT64_MAX);
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
+ subsize);
}
- *rate = sub_rate[0] + sub_rate[1];
- *dist = sub_dist[0] + sub_dist[1];
break;
- case PARTITION_VERT:
+ case PARTITION_HORZ:
+ if (output_enabled)
+ cm->counts.partition[ctx][PARTITION_HORZ]++;
*get_sb_index(x, subsize) = 0;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
- subsize, get_block_context(x, subsize), INT64_MAX);
- if (bsize >= BLOCK_8X8 && mi_col + num_8x8_subsize < cm->mi_cols) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+ if (mi_row + hbs < cm->mi_rows) {
*get_sb_index(x, subsize) = 1;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize,
- &sub_rate[1], &sub_dist[1], subsize,
- get_block_context(x, subsize), INT64_MAX);
+ encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
+ subsize);
}
- *rate = sub_rate[0] + sub_rate[1];
- *dist = sub_dist[1] + sub_dist[1];
break;
case PARTITION_SPLIT:
- *get_sb_index(x, subsize) = 0;
- pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize,
- &sub_rate[0], &sub_dist[0], 0);
-
- if ((mi_col + num_8x8_subsize) < cm->mi_cols) {
- *get_sb_index(x, subsize) = 1;
- pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize, tp,
- mi_row, mi_col + num_8x8_subsize, subsize,
- &sub_rate[1], &sub_dist[1], 0);
- }
-
- if ((mi_row + num_8x8_subsize) < cm->mi_rows) {
- *get_sb_index(x, subsize) = 2;
- pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize * mi_stride, tp,
- mi_row + num_8x8_subsize, mi_col, subsize,
- &sub_rate[2], &sub_dist[2], 0);
- }
-
- if ((mi_col + num_8x8_subsize) < cm->mi_cols &&
- (mi_row + num_8x8_subsize) < cm->mi_rows) {
- *get_sb_index(x, subsize) = 3;
- mi_offset = num_8x8_subsize * mi_stride + num_8x8_subsize;
- pick_partition_type(cpi, tile, mi_8x8 + mi_offset, tp,
- mi_row + num_8x8_subsize, mi_col + num_8x8_subsize,
- subsize, &sub_rate[3], &sub_dist[3], 0);
- }
-
- for (i = 0; i < 4; ++i) {
- *rate += sub_rate[i];
- *dist += sub_dist[i];
- }
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ if (output_enabled)
+ cm->counts.partition[ctx][PARTITION_SPLIT]++;
+ *get_sb_index(x, subsize) = 0;
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+ *get_sb_index(x, subsize) = 1;
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
+ subsize);
+ *get_sb_index(x, subsize) = 2;
+ encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
+ subsize);
+ *get_sb_index(x, subsize) = 3;
+ encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ subsize);
break;
default:
- assert(0);
+ assert("Invalid partition type.");
}
- if (do_recon) {
- int output_enabled = (bsize == BLOCK_64X64);
-
- // Check the projected output rate for this SB against it's target
- // and and if necessary apply a Q delta using segmentation to get
- // closer to the target.
- if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
- select_in_frame_q_segment(cpi, mi_row, mi_col,
- output_enabled, *rate);
- }
-
- encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
- }
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+ update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
+ mi_row, mi_col, subsize, bsize);
}
static void rd_use_partition(VP9_COMP *cpi,
@@ -1444,15 +1486,19 @@ static void rd_use_partition(VP9_COMP *cpi,
}
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
- BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
- BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
+ BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+ BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+ BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+ BLOCK_16X16
};
static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
- BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
- BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
+ BLOCK_8X8, BLOCK_16X16, BLOCK_16X16,
+ BLOCK_16X16, BLOCK_32X32, BLOCK_32X32,
+ BLOCK_32X32, BLOCK_64X64, BLOCK_64X64,
+ BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
+ BLOCK_64X64
};
// Look at all the mode_info entries for blocks that are part of this
@@ -1538,9 +1584,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
}
}
- // Give a bit of leaway either side of the observed min and max
- *min_block_size = min_partition_size[*min_block_size];
- *max_block_size = max_partition_size[*max_block_size];
+ // adjust observed min and max
+ if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
+ *min_block_size = min_partition_size[*min_block_size];
+ *max_block_size = max_partition_size[*max_block_size];
+ }
// Check border cases where max and min from neighbours may not be legal.
*max_block_size = find_partition_size(*max_block_size,
@@ -1996,34 +2044,6 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
}
-static void encode_sb_row_rt(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
- VP9_COMMON *const cm = &cpi->common;
- int mi_col;
-
- cpi->sf.always_this_block_size = BLOCK_8X8;
-
- // Initialize the left context for the new SB row
- vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
- vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
-
- // Code each SB in the row
- for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
- mi_col += MI_BLOCK_SIZE) {
- int dummy_rate;
- int64_t dummy_dist;
- const int idx_str = cm->mode_info_stride * mi_row + mi_col;
- MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-
- vp9_zero(cpi->mb.pred_mv);
-
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
- pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1);
- }
-}
-
static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common;
@@ -2250,11 +2270,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += 8)
-#if 1
encode_sb_row(cpi, &tile, mi_row, &tp);
-#else
- encode_sb_row_rt(cpi, &tile, mi_row, &tp);
-#endif
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2433,6 +2449,264 @@ static void select_tx_mode(VP9_COMP *cpi) {
}
}
}
+// Start RTC Exploration
+typedef enum {
+ BOTH_ZERO = 0,
+ ZERO_PLUS_PREDICTED = 1,
+ BOTH_PREDICTED = 2,
+ NEW_PLUS_NON_INTRA = 3,
+ BOTH_NEW = 4,
+ INTRA_PLUS_NON_INTRA = 5,
+ BOTH_INTRA = 6,
+ INVALID_CASE = 9
+} motion_vector_context;
+
+static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
+ MB_PREDICTION_MODE mode, int mi_row, int mi_col) {
+ mbmi->interp_filter = EIGHTTAP;
+ mbmi->mode = mode;
+ mbmi->mv[0].as_int = 0;
+ mbmi->mv[1].as_int = 0;
+ if (mode < NEARESTMV) {
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ } else {
+ mbmi->ref_frame[0] = LAST_FRAME;
+ }
+
+ mbmi->ref_frame[1] = INTRA_FRAME;
+ mbmi->tx_size = max_txsize_lookup[bsize];
+ mbmi->uv_mode = mode;
+ mbmi->skip_coeff = 0;
+ mbmi->sb_type = bsize;
+ mbmi->segment_id = 0;
+}
+static inline int get_block_row(int b32i, int b16i, int b8i) {
+ return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
+}
+static inline int get_block_col(int b32i, int b16i, int b8i) {
+ return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
+}
+static void rtc_use_partition(VP9_COMP *cpi,
+ const TileInfo *const tile,
+ MODE_INFO **mi_8x8,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *rate, int64_t *dist,
+ int do_recon) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ const int mis = cm->mode_info_stride;
+ int mi_width = num_8x8_blocks_wide_lookup[cpi->sf.always_this_block_size];
+ int mi_height = num_8x8_blocks_high_lookup[cpi->sf.always_this_block_size];
+ int i, j;
+ int chosen_rate = INT_MAX;
+ int64_t chosen_dist = INT_MAX;
+ MB_PREDICTION_MODE mode = DC_PRED;
+ int row8x8_remaining = tile->mi_row_end - mi_row;
+ int col8x8_remaining = tile->mi_col_end - mi_col;
+ int b32i;
+ x->fast_ms = 0;
+ x->subblock_ref = 0;
+ for (b32i = 0; b32i < 4; b32i++) {
+ int b16i;
+ for (b16i = 0; b16i < 4; b16i++) {
+ int b8i;
+ int block_row = get_block_row(b32i, b16i, 0);
+ int block_col = get_block_col(b32i, b16i, 0);
+ int index = block_row * mis + block_col;
+ int rate;
+ int64_t dist;
+
+ int_mv frame_nearest_mv[MAX_REF_FRAMES];
+ int_mv frame_near_mv[MAX_REF_FRAMES];
+ struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
+
+ // Find a partition size that fits
+ bsize = find_partition_size(cpi->sf.always_this_block_size,
+ (row8x8_remaining - block_row),
+ (col8x8_remaining - block_col),
+ &mi_height, &mi_width);
+ mi_8x8[index] = mi_8x8[0] + index;
+
+ set_mi_row_col(xd, tile, mi_row + block_row, mi_height,
+ mi_col + block_col, mi_width, cm->mi_rows, cm->mi_cols);
+
+ xd->mi_8x8 = mi_8x8 + index;
+
+ if (cm->frame_type != KEY_FRAME) {
+ set_offsets(cpi, tile, mi_row + block_row, mi_col + block_col, bsize);
+
+ vp9_pick_inter_mode(cpi, x, tile,
+ mi_row + block_row, mi_col + block_col,
+ &rate, &dist, cpi->sf.always_this_block_size);
+ } else {
+ set_mode_info(&mi_8x8[index]->mbmi, bsize, mode,
+ mi_row + block_row, mi_col + block_col);
+ vp9_setup_buffer_inter(cpi, x, tile,
+ LAST_FRAME, cpi->sf.always_this_block_size,
+ mi_row + block_row, mi_col + block_col,
+ frame_nearest_mv, frame_near_mv, yv12_mb);
+ }
+
+ for (j = 0; j < mi_height; j++)
+ for (i = 0; i < mi_width; i++)
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > i
+ && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > j) {
+ mi_8x8[index+ i + j * mis] = mi_8x8[index];
+ }
+
+ for (b8i = 0; b8i < 4; b8i++) {
+ }
+ }
+ }
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
+
+ *rate = chosen_rate;
+ *dist = chosen_dist;
+}
+
+static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, TOKENEXTRA **tp) {
+ VP9_COMMON * const cm = &cpi->common;
+ int mi_col;
+
+ // Initialize the left context for the new SB row
+ vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
+ vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+
+ // Code each SB in the row
+ for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+ mi_col += MI_BLOCK_SIZE) {
+ int dummy_rate;
+ int64_t dummy_dist;
+
+ const int idx_str = cm->mode_info_stride * mi_row + mi_col;
+ MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
+
+ cpi->mb.source_variance = UINT_MAX;
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
+ rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1);
+ }
+}
+
+
+static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+ int mi_row;
+ MACROBLOCK * const x = &cpi->mb;
+ VP9_COMMON * const cm = &cpi->common;
+ MACROBLOCKD * const xd = &x->e_mbd;
+
+// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
+// cpi->common.current_video_frame, cpi->common.show_frame,
+// cm->frame_type);
+
+// debug output
+#if DBG_PRNT_SEGMAP
+ {
+ FILE *statsfile;
+ statsfile = fopen("segmap2.stt", "a");
+ fprintf(statsfile, "\n");
+ fclose(statsfile);
+ }
+#endif
+
+ vp9_zero(cm->counts.switchable_interp);
+ vp9_zero(cpi->tx_stepdown_count);
+
+ xd->mi_8x8 = cm->mi_grid_visible;
+ // required for vp9_frame_init_quantizer
+ xd->mi_8x8[0] = cm->mi;
+
+ xd->last_mi = cm->prev_mi;
+
+ vp9_zero(cpi->common.counts.mv);
+ vp9_zero(cpi->coef_counts);
+ vp9_zero(cm->counts.eob_branch);
+
+ cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
+ && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
+
+ vp9_frame_init_quantizer(cpi);
+
+ vp9_initialize_rd_consts(cpi);
+ vp9_initialize_me_consts(cpi, cm->base_qindex);
+ switch_tx_mode(cpi);
+ cpi->sf.always_this_block_size = BLOCK_16X16;
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ // Initialize encode frame context.
+ init_encode_frame_mb_context(cpi);
+
+ // Build a frame level activity map
+ build_activity_map(cpi);
+ }
+
+ // Re-initialize encode frame context.
+ init_encode_frame_mb_context(cpi);
+
+ vp9_zero(cpi->rd_comp_pred_diff);
+ vp9_zero(cpi->rd_filter_diff);
+ vp9_zero(cpi->rd_tx_select_diff);
+ vp9_zero(cpi->rd_tx_select_threshes);
+
+ set_prev_mi(cm);
+
+ {
+ struct vpx_usec_timer emr_timer;
+ vpx_usec_timer_start(&emr_timer);
+
+ {
+ // Take tiles into account and give start/end MB
+ int tile_col, tile_row;
+ TOKENEXTRA *tp = cpi->tok;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileInfo tile;
+ TOKENEXTRA *tp_old = tp;
+
+ // For each row of SBs in the frame
+ vp9_tile_init(&tile, cm, tile_row, tile_col);
+ for (mi_row = tile.mi_row_start;
+ mi_row < tile.mi_row_end; mi_row += 8)
+ encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+
+ cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
+ assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+ }
+ }
+ }
+
+ vpx_usec_timer_mark(&emr_timer);
+ cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
+ }
+
+ if (cpi->sf.skip_encode_sb) {
+ int j;
+ unsigned int intra_count = 0, inter_count = 0;
+ for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
+ intra_count += cm->counts.intra_inter[j][0];
+ inter_count += cm->counts.intra_inter[j][1];
+ }
+ cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
+ cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
+ cpi->sf.skip_encode_frame &= cm->show_frame;
+ } else {
+ cpi->sf.skip_encode_frame = 0;
+ }
+
+#if 0
+ // Keep record of the total distortion this time around for future use
+ cpi->last_frame_distortion = cpi->frame_distortion;
+#endif
+}
+// end RTC play code
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -2512,7 +2786,11 @@ void vp9_encode_frame(VP9_COMP *cpi) {
select_tx_mode(cpi);
cm->reference_mode = reference_mode;
cm->interp_filter = interp_filter;
- encode_frame_internal(cpi);
+
+ if (cpi->compressor_speed == 3)
+ encode_rtc_frame_internal(cpi);
+ else
+ encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i) {
const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2590,7 +2868,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
}
}
} else {
- encode_frame_internal(cpi);
+ encode_rtc_frame_internal(cpi);
}
}
@@ -2666,7 +2944,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
- (cpi->oxcf.aq_mode != COMPLEXITY_AQ);
+ (cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
+ cpi->compressor_speed != 3;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
@@ -2681,7 +2960,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
vp9_update_zbin_extra(cpi, x);
}
} else {
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
// Adjust the zbin based on this MB rate.
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index d1485307d..8ff23c79a 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -25,24 +25,6 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_tokenize.h"
-void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERP_FILTER filter,
- VP9_COMMON *cm) {
- if (xd->mi_8x8 && xd->mi_8x8[0]) {
- MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0] - LAST_FRAME,
- mbmi->ref_frame[1] - LAST_FRAME);
-
- } else {
- set_ref_ptrs(cm, xd, -1, -1);
- }
-
- xd->subpix.filter_x = xd->subpix.filter_y =
- vp9_get_interp_kernel(filter == SWITCHABLE ? EIGHTTAP : filter);
-
- assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
-}
-
void vp9_subtract_block_c(int rows, int cols,
int16_t *diff_ptr, ptrdiff_t diff_stride,
const uint8_t *src_ptr, ptrdiff_t src_stride,
@@ -356,7 +338,6 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize,
pd->above_context, pd->left_context,
num_4x4_w, num_4x4_h);
}
-
void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index c728efd49..9f6c9f069 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -48,8 +48,7 @@ void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize);
int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
-void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERP_FILTER filter,
- VP9_COMMON *cm);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 395ce2008..af9fa1ba7 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -447,6 +447,16 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
}
+static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) {
+ if (2 * mb_col + 1 < cm->mi_cols) {
+ return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16
+ : BLOCK_16X8;
+ } else {
+ return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16
+ : BLOCK_8X8;
+ }
+}
+
void vp9_first_pass(VP9_COMP *cpi) {
int mb_row, mb_col;
MACROBLOCK *const x = &cpi->mb;
@@ -481,10 +491,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
int sum_in_vectors = 0;
uint32_t lastmv_as_int = 0;
struct twopass_rc *const twopass = &cpi->twopass;
-
- int_mv zero_ref_mv;
-
- zero_ref_mv.as_int = 0;
+ const MV zero_mv = {0, 0};
vp9_clear_system_state(); // __asm emms;
@@ -493,8 +500,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
setup_dst_planes(xd, new_yv12, 0, 0);
xd->mi_8x8 = cm->mi_grid_visible;
- // required for vp9_frame_init_quantizer
- xd->mi_8x8[0] = cm->mi;
+ xd->mi_8x8[0] = cm->mi; // required for vp9_frame_init_quantizer
setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
@@ -508,14 +514,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
}
x->skip_recode = 0;
-
- // Initialise the MV cost table to the defaults
- // if( cm->current_video_frame == 0)
- // if ( 0 )
- {
- vp9_init_mv_probs(cm);
- vp9_initialize_rd_consts(cpi);
- }
+ vp9_init_mv_probs(cm);
+ vp9_initialize_rd_consts(cpi);
// tiling is ignored in the first pass
vp9_tile_init(&tile, cm, 0, 0);
@@ -540,8 +540,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
int this_error;
- int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
+ const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
double error_weight = 1.0;
+ const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
vp9_clear_system_state(); // __asm emms;
@@ -549,30 +550,15 @@ void vp9_first_pass(VP9_COMP *cpi) {
xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
-
- if (mb_col * 2 + 1 < cm->mi_cols) {
- if (mb_row * 2 + 1 < cm->mi_rows) {
- xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X16;
- } else {
- xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X8;
- }
- } else {
- if (mb_row * 2 + 1 < cm->mi_rows) {
- xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X16;
- } else {
- xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X8;
- }
- }
+ xd->mi_8x8[0]->mbmi.sb_type = bsize;
xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
set_mi_row_col(xd, &tile,
- mb_row << 1,
- num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type],
- mb_col << 1,
- num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type],
+ mb_row << 1, num_8x8_blocks_high_lookup[bsize],
+ mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
cm->mi_rows, cm->mi_cols);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- int energy = vp9_block_energy(cpi, x, xd->mi_8x8[0]->mbmi.sb_type);
+ const int energy = vp9_block_energy(cpi, x, bsize);
error_weight = vp9_vaq_inv_q_ratio(energy);
}
@@ -598,8 +584,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders.
x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
- x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
- + BORDER_MV_PIXELS_B16;
+ x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
// Other than for the first frame do a motion search
if (cm->current_video_frame > 0) {
@@ -624,7 +609,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
// based search as well.
if (best_ref_mv.as_int) {
tmp_err = INT_MAX;
- first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
&tmp_err);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state(); // __asm emms;
@@ -645,17 +630,15 @@ void vp9_first_pass(VP9_COMP *cpi) {
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
gf_motion_error = zz_motion_search(cpi, x);
- first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
&gf_motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state(); // __asm emms;
gf_motion_error *= error_weight;
}
- if ((gf_motion_error < motion_error) &&
- (gf_motion_error < this_error)) {
+ if (gf_motion_error < motion_error && gf_motion_error < this_error)
second_ref_count++;
- }
// Reset to last frame as reference buffer
xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
@@ -692,9 +675,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE;
- vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1,
- xd->mi_8x8[0]->mbmi.sb_type);
- vp9_encode_sby(x, xd->mi_8x8[0]->mbmi.sb_type);
+ vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
+ vp9_encode_sby(x, bsize);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
@@ -784,13 +766,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
fps.mvr_abs = (double)sum_mvr_abs / mvcount;
fps.MVc = (double)sum_mvc / mvcount;
fps.mvc_abs = (double)sum_mvc_abs / mvcount;
- fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) /
- mvcount;
- fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) /
- mvcount;
+ fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) / mvcount;
+ fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / mvcount;
fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
fps.new_mv_count = new_mv_count;
- fps.pcnt_motion = (double)mvcount / cpi->common.MBs;
+ fps.pcnt_motion = (double)mvcount / cm->MBs;
} else {
fps.MVr = 0.0;
fps.mvr_abs = 0.0;
@@ -918,8 +898,7 @@ static double calc_correction_factor(double err_per_mb,
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-static int estimate_max_q(VP9_COMP *cpi,
- FIRSTPASS_STATS *fpstats,
+static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
int section_target_bandwitdh) {
int q;
const int num_mbs = cpi->common.MBs;
@@ -1093,12 +1072,12 @@ void vp9_end_second_pass(VP9_COMP *cpi) {
// This function gives and estimate of how badly we believe
// the prediction quality is decaying from frame to frame.
-static double get_prediction_decay_rate(VP9_COMP *cpi,
- FIRSTPASS_STATS *next_frame) {
+static double get_prediction_decay_rate(const VP9_COMMON *cm,
+ const FIRSTPASS_STATS *next_frame) {
// Look at the observed drop in prediction quality between the last frame
// and the GF buffer (which contains an older frame).
const double mb_sr_err_diff = (next_frame->sr_coded_error -
- next_frame->coded_error) / cpi->common.MBs;
+ next_frame->coded_error) / cm->MBs;
const double second_ref_decay = mb_sr_err_diff <= 512.0
? fclamp(pow(1.0 - (mb_sr_err_diff / 512.0), 0.5), 0.85, 1.0)
: 0.85;
@@ -1126,7 +1105,6 @@ static int detect_transition_to_still(
int j;
FIRSTPASS_STATS *position = cpi->twopass.stats_in;
FIRSTPASS_STATS tmp_next_frame;
- double zz_inter;
// Look ahead a few frames to see if static condition
// persists...
@@ -1134,11 +1112,10 @@ static int detect_transition_to_still(
if (EOF == input_stats(&cpi->twopass, &tmp_next_frame))
break;
- zz_inter = (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion);
- if (zz_inter < 0.999)
+ if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999)
break;
}
- // Reset file position
+
reset_fpf_position(&cpi->twopass, position);
// Only if it does do we signal a transition to still
@@ -1152,14 +1129,14 @@ static int detect_transition_to_still(
// This function detects a flash through the high relative pcnt_second_ref
// score in the frame following a flash frame. The offset passed in should
// reflect this
-static int detect_flash(VP9_COMP *cpi, int offset) {
+static int detect_flash(const struct twopass_rc *twopass, int offset) {
FIRSTPASS_STATS next_frame;
int flash_detected = 0;
// Read the frame data.
// The return is FALSE (no flash detected) if not a valid frame
- if (read_frame_stats(&cpi->twopass, &next_frame, offset) != EOF) {
+ if (read_frame_stats(twopass, &next_frame, offset) != EOF) {
// What we are looking for here is a situation where there is a
// brief break in prediction (such as a flash) but subsequent frames
// are reasonably well predicted by an earlier (pre flash) frame.
@@ -1188,16 +1165,15 @@ static void accumulate_frame_motion_stats(
// Accumulate Motion In/Out of frame stats
*this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
*mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
- *abs_mv_in_out_accumulator +=
- fabs(this_frame->mv_in_out_count * motion_pct);
+ *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct);
// Accumulate a measure of how uniform (or conversely how random)
// the motion field is. (A ratio of absmv / mv)
if (motion_pct > 0.05) {
- double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
+ const double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr));
- double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
+ const double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc));
*mv_ratio_accumulator += (this_frame_mvr_ratio < this_frame->mvr_abs)
@@ -1240,7 +1216,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
int f_frames, int b_frames,
int *f_boost, int *b_boost) {
FIRSTPASS_STATS this_frame;
-
+ struct twopass_rc *const twopass = &cpi->twopass;
int i;
double boost_score = 0.0;
double mv_ratio_accumulator = 0.0;
@@ -1253,7 +1229,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
// Search forward from the proposed arf/next gf position
for (i = 0; i < f_frames; i++) {
- if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF)
+ if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
break;
// Update the motion related elements to the boost calculation
@@ -1264,12 +1240,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
// We want to discount the flash frame itself and the recovery
// frame that follows as both will have poor scores.
- flash_detected = detect_flash(cpi, (i + offset)) ||
- detect_flash(cpi, (i + offset + 1));
+ flash_detected = detect_flash(twopass, i + offset) ||
+ detect_flash(twopass, i + offset + 1);
// Cumulative effect of prediction quality decay
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
@@ -1290,7 +1266,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
// Search backward towards last gf position
for (i = -1; i >= -b_frames; i--) {
- if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF)
+ if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
break;
// Update the motion related elements to the boost calculation
@@ -1301,12 +1277,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
// We want to discount the the flash frame itself and the recovery
// frame that follows as both will have poor scores.
- flash_detected = detect_flash(cpi, (i + offset)) ||
- detect_flash(cpi, (i + offset + 1));
+ flash_detected = detect_flash(twopass, i + offset) ||
+ detect_flash(twopass, i + offset + 1);
// Cumulative effect of prediction quality decay
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
@@ -1466,6 +1442,7 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
FIRSTPASS_STATS next_frame = { 0 };
FIRSTPASS_STATS *start_pos;
+ struct twopass_rc *const twopass = &cpi->twopass;
int i;
double boost_score = 0.0;
double old_boost_score = 0.0;
@@ -1486,8 +1463,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_ratio_accumulator_thresh;
int max_bits = frame_max_bits(cpi); // Max for a single frame
- unsigned int allow_alt_ref =
- cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
+ unsigned int allow_alt_ref = cpi->oxcf.play_alternate &&
+ cpi->oxcf.lag_in_frames;
int f_boost = 0;
int b_boost = 0;
@@ -1495,11 +1472,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int active_max_gf_interval;
RATE_CONTROL *const rc = &cpi->rc;
- cpi->twopass.gf_group_bits = 0;
+ twopass->gf_group_bits = 0;
vp9_clear_system_state(); // __asm emms;
- start_pos = cpi->twopass.stats_in;
+ start_pos = twopass->stats_in;
// Load stats for the current frame.
mod_frame_err = calculate_modified_err(cpi, this_frame);
@@ -1530,20 +1507,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
active_max_gf_interval = rc->max_gf_interval;
i = 0;
- while ((i < cpi->twopass.static_scene_max_gf_interval) &&
- (i < rc->frames_to_key)) {
+ while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) {
i++; // Increment the loop counter
// Accumulate error score of frames in this gf group
mod_frame_err = calculate_modified_err(cpi, this_frame);
gf_group_err += mod_frame_err;
- if (EOF == input_stats(&cpi->twopass, &next_frame))
+ if (EOF == input_stats(twopass, &next_frame))
break;
// Test for the case where there is a brief flash but the prediction
// quality back to an earlier frame is then restored.
- flash_detected = detect_flash(cpi, 0);
+ flash_detected = detect_flash(twopass, 0);
// Update the motion related elements to the boost calculation
accumulate_frame_motion_stats(&next_frame,
@@ -1554,14 +1530,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Cumulative effect of prediction quality decay
if (!flash_detected) {
last_loop_decay_rate = loop_decay_rate;
- loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+ loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
decay_accumulator = decay_accumulator * loop_decay_rate;
// Monitor for static sections.
if ((next_frame.pcnt_inter - next_frame.pcnt_motion) <
zero_motion_accumulator) {
- zero_motion_accumulator =
- (next_frame.pcnt_inter - next_frame.pcnt_motion);
+ zero_motion_accumulator = next_frame.pcnt_inter -
+ next_frame.pcnt_motion;
}
// Break clause to detect very still sections after motion
@@ -1599,14 +1575,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
old_boost_score = boost_score;
}
- cpi->twopass.gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
+ twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
// Don't allow a gf too near the next kf
if ((rc->frames_to_key - i) < MIN_GF_INTERVAL) {
while (i < (rc->frames_to_key + !rc->next_key_frame_forced)) {
i++;
- if (EOF == input_stats(&cpi->twopass, this_frame))
+ if (EOF == input_stats(twopass, this_frame))
break;
if (i < rc->frames_to_key) {
@@ -2069,7 +2045,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// How fast is prediction quality decaying
- loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+ loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
// We want to know something about the recent past... rather than
// as used elsewhere where we are concened with decay in prediction
@@ -2203,8 +2179,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
r = RMAX;
// How fast is prediction quality decaying
- if (!detect_flash(cpi, 0)) {
- loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+ if (!detect_flash(twopass, 0)) {
+ loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
decay_accumulator *= loop_decay_rate;
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
@@ -2333,6 +2309,7 @@ void vp9_get_svc_params(VP9_COMP *cpi) {
(cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
cpi->key_frame_frequency == 0))) {
cm->frame_type = KEY_FRAME;
+ cpi->rc.source_alt_ref_active = 0;
} else {
cm->frame_type = INTER_FRAME;
}
@@ -2340,6 +2317,9 @@ void vp9_get_svc_params(VP9_COMP *cpi) {
cpi->rc.baseline_gf_interval = INT_MAX;
}
+// Use this macro to turn on/off use of alt-refs in one-pass mode.
+#define USE_ALTREF_FOR_ONE_PASS 1
+
void vp9_get_one_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (!cpi->refresh_alt_ref_frame &&
@@ -2351,13 +2331,20 @@ void vp9_get_one_pass_params(VP9_COMP *cpi) {
cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
cpi->rc.frames_to_key == 0;
cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = 300;
+ cpi->rc.kf_boost = 2000;
+ cpi->rc.source_alt_ref_active = 0;
} else {
cm->frame_type = INTER_FRAME;
}
if (cpi->rc.frames_till_gf_update_due == 0) {
+ cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
+ // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+ if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key)
+ cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
cpi->refresh_golden_frame = 1;
+ cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+ cpi->rc.gfu_boost = 1000;
}
}
@@ -2371,7 +2358,8 @@ void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) {
cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
cpi->rc.frames_to_key == 0;
cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = 300;
+ cpi->rc.kf_boost = 2000;
+ cpi->rc.source_alt_ref_active = 0;
} else {
cm->frame_type = INTER_FRAME;
}
@@ -2405,12 +2393,13 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) {
double this_frame_intra_error;
double this_frame_coded_error;
+ if (!cpi->twopass.stats_in)
+ return;
if (cpi->refresh_alt_ref_frame) {
cpi->common.frame_type = INTER_FRAME;
+ rc->per_frame_bandwidth = cpi->twopass.gf_bits;
return;
}
- if (!cpi->twopass.stats_in)
- return;
vp9_clear_system_state();
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index ee73ff15a..e6e59c05a 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -173,7 +173,6 @@ struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx,
int index) {
struct lookahead_entry *buf = NULL;
- assert(index < (int)ctx->max_sz);
if (index < (int)ctx->sz) {
index += ctx->read_idx;
if (index >= (int)ctx->max_sz)
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 88023513a..6bc88ec44 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -626,7 +626,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -663,7 +663,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -698,7 +698,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->disable_filter_search_var_thresh = 200;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -797,7 +797,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -839,6 +839,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
if (speed >= 5) {
int i;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
+ RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+ sf->subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
@@ -867,6 +870,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->recode_loop = 1;
sf->subpel_search_method = SUBPEL_TREE;
sf->subpel_iters_per_step = 2;
+ sf->subpel_force_stop = 0;
sf->optimize_coefficients = !cpi->oxcf.lossless;
sf->reduce_first_step_size = 0;
sf->auto_mv_step_size = 0;
@@ -882,7 +886,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_one_partition_size_always = 0;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
- sf->auto_min_max_partition_size = 0;
+ sf->auto_min_max_partition_size = NOT_IN_USE;
sf->max_partition_size = BLOCK_64X64;
sf->min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
@@ -1258,6 +1262,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->pass = 2;
cpi->compressor_speed = 0;
break;
+
+ case MODE_REALTIME:
+ cpi->pass = 0;
+ cpi->compressor_speed = 3;
+ break;
}
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
@@ -2541,7 +2550,10 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vpx_usec_timer_start(&timer);
- vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
+ if (cpi->compressor_speed == 3)
+ lf->filter_level = 4;
+ else
+ vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
@@ -2730,7 +2742,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if (cpi->sf.recode_loop != 0) {
vp9_save_coding_context(cpi);
cpi->dummy_packing = 1;
- vp9_pack_bitstream(cpi, dest, size);
+ if (cpi->compressor_speed != 3)
+ vp9_pack_bitstream(cpi, dest, size);
+
cpi->rc.projected_frame_size = (*size) << 3;
vp9_restore_coding_context(cpi);
@@ -2953,15 +2967,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Clear down mmx registers to allow floating point in what follows.
vp9_clear_system_state();
- // For an alt ref frame in 2 pass we skip the call to the second
- // pass function that sets the target bandwidth so we must set it here.
- if (cpi->refresh_alt_ref_frame) {
- // Set a per frame bit target for the alt ref frame.
- cpi->rc.per_frame_bandwidth = cpi->twopass.gf_bits;
- // Set a per second target bitrate.
- cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * cpi->output_framerate);
- }
-
// Clear zbin over-quant value and mode boost values.
cpi->zbin_mode_boost = 0;
@@ -3088,11 +3093,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
&frame_under_shoot_limit,
&frame_over_shoot_limit);
- // Decide q and q bounds
+ // Decide q and q bounds.
q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
&bottom_index,
&top_index);
+ // JBB : This is realtime mode. In real time mode the first frame
+ // should be larger. Q of 0 is disabled because we force tx size to be
+ // 16x16...
+ if (cpi->compressor_speed == 3) {
+ if (cpi->common.current_video_frame == 0)
+ q /= 3;
+
+ if (q == 0)
+ q++;
+ }
+
if (!frame_is_intra_only(cm)) {
cm->interp_filter = DEFAULT_INTERP_FILTER;
/* TODO: Decide this more intelligently */
@@ -3293,7 +3309,6 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
vp9_get_second_pass_params(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
- // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt");
vp9_twopass_postencode_update(cpi, *size);
}
@@ -3402,6 +3417,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
int64_t *time_stamp, int64_t *time_end, int flush) {
VP9_COMP *cpi = (VP9_COMP *) ptr;
VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
struct vpx_usec_timer cmptimer;
YV12_BUFFER_CONFIG *force_src_buffer = NULL;
MV_REFERENCE_FRAME ref_frame;
@@ -3449,8 +3465,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
if (cpi->oxcf.arnr_max_frames > 0) {
// Produce the filtered ARF frame.
// TODO(agrange) merge these two functions.
- configure_arnr_filter(cpi, cm->current_video_frame + frames_to_arf,
- cpi->rc.gfu_boost);
+ vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost);
vp9_temporal_filter_prepare(cpi, frames_to_arf);
vp9_extend_frame_borders(&cpi->alt_ref_buffer,
cm->subsampling_x, cm->subsampling_y);
@@ -3466,7 +3481,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
#if CONFIG_MULTIPLE_ARF
if (!cpi->multi_arf_enabled)
#endif
- cpi->rc.source_alt_ref_pending = 0; // Clear Pending altf Ref flag.
+ cpi->rc.source_alt_ref_pending = 0;
+ } else {
+ cpi->rc.source_alt_ref_pending = 0;
}
}
@@ -3585,11 +3602,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
vp9_extend_frame_borders(buf, cm->subsampling_x, cm->subsampling_y);
}
- vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
+ set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
+ xd->interp_kernel = vp9_get_interp_kernel(
+ DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_vaq_init();
- }
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ)
+ vp9_vaq_init();
if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 9cf3f62d6..d2f42dd3e 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -8,25 +8,28 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#ifndef VP9_ENCODER_VP9_ONYX_INT_H_
#define VP9_ENCODER_VP9_ONYX_INT_H_
#include <stdio.h>
+
#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
+#include "vpx/internal/vpx_codec_internal.h"
+
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_onyx.h"
-#include "vp9/encoder/vp9_treewriter.h"
-#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/encoder/vp9_variance.h"
+
#include "vp9/encoder/vp9_encodemb.h"
-#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_entropymode.h"
-#include "vpx_ports/mem.h"
-#include "vpx/internal/vpx_codec_internal.h"
-#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_variance.h"
#ifdef __cplusplus
extern "C" {
@@ -98,18 +101,6 @@ typedef struct {
} FIRSTPASS_STATS;
typedef struct {
- int frames_so_far;
- double frame_intra_error;
- double frame_coded_error;
- double frame_pcnt_inter;
- double frame_pcnt_motion;
- double frame_mvr;
- double frame_mvr_abs;
- double frame_mvc;
- double frame_mvc_abs;
-} ONEPASS_FRAMESTATS;
-
-typedef struct {
struct {
int err;
union {
@@ -190,6 +181,12 @@ typedef enum {
} TX_SIZE_SEARCH_METHOD;
typedef enum {
+ NOT_IN_USE = 0,
+ RELAXED_NEIGHBORING_MIN_MAX = 1,
+ STRICT_NEIGHBORING_MIN_MAX = 2
+} AUTO_MIN_MAX_MODE;
+
+typedef enum {
// Values should be powers of 2 so that they can be selected as bits of
// an integer flags field
@@ -256,6 +253,9 @@ typedef struct {
// Maximum number of steps in logarithmic subpel search before giving up.
int subpel_iters_per_step;
+ // Control when to stop subpel search
+ int subpel_force_stop;
+
// Thresh_mult is used to set a threshold for the rd score. A higher value
// means that we will accept the best mode so far more often. This number
// is used in combination with the current block size, and thresh_freq_fact
@@ -343,9 +343,8 @@ typedef struct {
BLOCK_SIZE always_this_block_size;
// Sets min and max partition sizes for this 64x64 region based on the
- // same superblock in last encoded frame, and the left and above neighbor
- // in this block.
- int auto_min_max_partition_size;
+ // same 64x64 in last encoded frame, and the left and above neighbor.
+ AUTO_MIN_MAX_MODE auto_min_max_partition_size;
// Min and max partition size we enable (block_size) as per auto
// min max, but also used by adjust partitioning, and pick_partitioning.
@@ -419,67 +418,6 @@ typedef struct {
int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
} SPEED_FEATURES;
-typedef struct {
- // Rate targetting variables
- int this_frame_target;
- int projected_frame_size;
- int sb64_target_rate;
- int last_q[3]; // Separate values for Intra/Inter/ARF-GF
- int last_boosted_qindex; // Last boosted GF/KF/ARF q
-
- int gfu_boost;
- int last_boost;
- int kf_boost;
-
- double rate_correction_factor;
- double key_frame_rate_correction_factor;
- double gf_rate_correction_factor;
-
- unsigned int frames_since_golden;
- unsigned int frames_till_gf_update_due; // Count down till next GF
- unsigned int max_gf_interval;
- unsigned int baseline_gf_interval;
- unsigned int frames_to_key;
- unsigned int frames_since_key;
- unsigned int this_key_frame_forced;
- unsigned int next_key_frame_forced;
- unsigned int source_alt_ref_pending;
- unsigned int source_alt_ref_active;
- unsigned int is_src_frame_alt_ref;
-
- int per_frame_bandwidth; // Current section per frame bandwidth target
- int av_per_frame_bandwidth; // Average frame size target for clip
- int min_frame_bandwidth; // Minimum allocation used for any frame
- int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
-
- int ni_av_qi;
- int ni_tot_qi;
- int ni_frames;
- int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF
- double tot_q;
- double avg_q;
-
- int buffer_level;
- int bits_off_target;
-
- int decimation_factor;
- int decimation_count;
-
- int rolling_target_bits;
- int rolling_actual_bits;
-
- int long_rolling_target_bits;
- int long_rolling_actual_bits;
-
- int64_t total_actual_bits;
- int total_target_vs_actual; // debug stats
-
- int worst_quality;
- int active_worst_quality;
- int best_quality;
- // int active_best_quality;
-} RATE_CONTROL;
-
typedef struct VP9_COMP {
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -606,11 +544,6 @@ typedef struct VP9_COMP {
int64_t target_bandwidth;
struct vpx_codec_pkt_list *output_pkt_list;
-#if 0
- // Experimental code for lagged and one pass
- ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS];
- int one_pass_frame_index;
-#endif
MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
int mbgraph_n_frames; // number of frames filled in the above
int static_mb_pct; // % forced skip mbs by segmentation
@@ -821,6 +754,14 @@ static int get_token_alloc(int mb_rows, int mb_cols) {
return mb_rows * mb_cols * (48 * 16 + 4);
}
+static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
+ MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) {
+ xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
+ : 0];
+ xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
+ : 0];
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 210d15f0d..2b9e31f08 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -80,7 +80,7 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
step_param = 6;
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
- for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
+ for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
tmp_mv->as_int = INVALID_MV;
@@ -142,8 +142,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col,
int *returnrate,
int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx) {
+ BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
@@ -155,6 +154,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
int64_t this_rd;
+ int64_t cost[4]= { 0, 100, 150, 205 };
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -171,7 +171,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
vp9_setup_buffer_inter(cpi, x, tile,
@@ -182,7 +182,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
int rate_mv = 0;
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
@@ -191,29 +191,42 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Select prediction reference frames.
xd->plane[0].pre[0] = yv12_mb[ref_frame][0];
-
- x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
- full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame], &rate_mv);
-
- if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
- continue;
-
clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
- int rate = x->inter_mode_cost[mbmi->mode_context[ref_frame]]
- [INTER_OFFSET(this_mode)];
- int64_t dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] *
- x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ int rate = cost[this_mode - NEARESTMV];
+ int64_t dist;
+
+ if (this_mode == NEWMV) {
+ if (this_rd < 300)
+ continue;
+
+ x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
+ full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], &rate_mv);
+
+ if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
+ continue;
+ }
+
+ dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
+ this_rd = rate + dist;
if (this_rd < best_rd) {
best_rd = this_rd;
mbmi->mode = this_mode;
mbmi->ref_frame[0] = ref_frame;
mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+ xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+ mbmi->interp_filter = EIGHTTAP;
+
+ mbmi->ref_frame[1] = INTRA_FRAME;
+ mbmi->tx_size = max_txsize_lookup[bsize];
+ mbmi->uv_mode = this_mode;
+ mbmi->skip_coeff = 0;
+ mbmi->sb_type = bsize;
+ mbmi->segment_id = 0;
}
}
}
@@ -223,8 +236,5 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// TODO(jingning) intra prediction search, if the best SAD is above a certain
// threshold.
- // store mode decisions
- ctx->mic = *xd->mi_8x8[0];
-
return INT64_MAX;
}
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index 82904ae8a..05ff18762 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -22,8 +22,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col,
int *returnrate,
int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx);
+ BLOCK_SIZE bsize);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 3c816a3d0..74eb98fb0 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -218,7 +218,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) {
vp9_clear_system_state(); // __asm emms;
// For 1-pass.
- if (cpi->pass == 0) {
+ if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
if (cpi->common.current_video_frame == 0) {
target = oxcf->starting_buffer_level / 2;
} else {
@@ -246,7 +246,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) {
if (oxcf->rc_max_intra_bitrate_pct) {
const int max_rate = rc->per_frame_bandwidth *
- oxcf->rc_max_intra_bitrate_pct / 100;
+ oxcf->rc_max_intra_bitrate_pct / 100;
target = MIN(target, max_rate);
}
rc->this_frame_target = target;
@@ -375,27 +375,22 @@ static int target_size_from_buffer_level(const VP9_CONFIG *oxcf,
static void calc_pframe_target_size(VP9_COMP *const cpi) {
RATE_CONTROL *const rc = &cpi->rc;
const VP9_CONFIG *const oxcf = &cpi->oxcf;
- int min_frame_target = MAX(rc->min_frame_bandwidth,
- rc->av_per_frame_bandwidth >> 5);
- if (cpi->refresh_alt_ref_frame) {
- // Special alt reference frame case
- // Per frame bit target for the alt ref frame
- rc->per_frame_bandwidth = cpi->twopass.gf_bits;
- rc->this_frame_target = rc->per_frame_bandwidth;
- } else {
- // Normal frames (gf and inter).
- rc->this_frame_target = rc->per_frame_bandwidth;
- // Set target frame size based on buffer level, for 1 pass CBR.
- if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
- // Need to decide how low min_frame_target should be for 1-pass CBR.
- // For now, use: cpi->rc.av_per_frame_bandwidth / 16:
- min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
- FRAME_OVERHEAD_BITS);
- rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
- // Adjust qp-max based on buffer level.
- rc->active_worst_quality =
- adjust_active_worst_quality_from_buffer_level(oxcf, rc);
- }
+ int min_frame_target;
+ rc->this_frame_target = rc->per_frame_bandwidth;
+
+ if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+ // Need to decide how low min_frame_target should be for 1-pass CBR.
+ // For now, use: cpi->rc.av_per_frame_bandwidth / 16:
+ min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+ FRAME_OVERHEAD_BITS);
+ rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
+ // Adjust qp-max based on buffer level.
+ rc->active_worst_quality =
+ adjust_active_worst_quality_from_buffer_level(oxcf, rc);
+
+ if (rc->this_frame_target < min_frame_target)
+ rc->this_frame_target = min_frame_target;
+ return;
}
// Check that the total sum of adjustments is not above the maximum allowed.
@@ -404,6 +399,9 @@ static void calc_pframe_target_size(VP9_COMP *const cpi) {
// not capable of recovering all the extra bits we have spent in the KF or GF,
// then the remainder will have to be recovered over a longer time span via
// other buffer / rate control mechanisms.
+ min_frame_target = MAX(rc->min_frame_bandwidth,
+ rc->av_per_frame_bandwidth >> 5);
+
if (rc->this_frame_target < min_frame_target)
rc->this_frame_target = min_frame_target;
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 65ddead11..eba4b7a92 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -12,66 +12,127 @@
#ifndef VP9_ENCODER_VP9_RATECTRL_H_
#define VP9_ENCODER_VP9_RATECTRL_H_
-#include "vp9/encoder/vp9_onyx_int.h"
-
#ifdef __cplusplus
extern "C" {
#endif
#define FRAME_OVERHEAD_BITS 200
-void vp9_save_coding_context(VP9_COMP *cpi);
-void vp9_restore_coding_context(VP9_COMP *cpi);
-
-void vp9_setup_key_frame(VP9_COMP *cpi);
-void vp9_setup_inter_frame(VP9_COMP *cpi);
+typedef struct {
+ // Rate targetting variables
+ int this_frame_target;
+ int projected_frame_size;
+ int sb64_target_rate;
+ int last_q[3]; // Separate values for Intra/Inter/ARF-GF
+ int last_boosted_qindex; // Last boosted GF/KF/ARF q
+
+ int gfu_boost;
+ int last_boost;
+ int kf_boost;
+
+ double rate_correction_factor;
+ double key_frame_rate_correction_factor;
+ double gf_rate_correction_factor;
+
+ unsigned int frames_since_golden;
+ unsigned int frames_till_gf_update_due; // Count down till next GF
+ unsigned int max_gf_interval;
+ unsigned int baseline_gf_interval;
+ unsigned int frames_to_key;
+ unsigned int frames_since_key;
+ unsigned int this_key_frame_forced;
+ unsigned int next_key_frame_forced;
+ unsigned int source_alt_ref_pending;
+ unsigned int source_alt_ref_active;
+ unsigned int is_src_frame_alt_ref;
+
+ int per_frame_bandwidth; // Current section per frame bandwidth target
+ int av_per_frame_bandwidth; // Average frame size target for clip
+ int min_frame_bandwidth; // Minimum allocation used for any frame
+ int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
+
+ int ni_av_qi;
+ int ni_tot_qi;
+ int ni_frames;
+ int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF
+ double tot_q;
+ double avg_q;
+
+ int buffer_level;
+ int bits_off_target;
+
+ int decimation_factor;
+ int decimation_count;
+
+ int rolling_target_bits;
+ int rolling_actual_bits;
+
+ int long_rolling_target_bits;
+ int long_rolling_actual_bits;
+
+ int64_t total_actual_bits;
+ int total_target_vs_actual; // debug stats
+
+ int worst_quality;
+ int active_worst_quality;
+ int best_quality;
+ // int active_best_quality;
+} RATE_CONTROL;
+
+struct VP9_COMP;
+
+void vp9_save_coding_context(struct VP9_COMP *cpi);
+void vp9_restore_coding_context(struct VP9_COMP *cpi);
+
+void vp9_setup_key_frame(struct VP9_COMP *cpi);
+void vp9_setup_inter_frame(struct VP9_COMP *cpi);
double vp9_convert_qindex_to_q(int qindex);
// Updates rate correction factors
-void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
+void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi, int damp_var);
// initialize luts for minq
void vp9_rc_init_minq_luts(void);
// return of 0 means drop frame
// Changes only rc.this_frame_target and rc.sb64_rate_target
-int vp9_rc_pick_frame_size_target(VP9_COMP *cpi);
+int vp9_rc_pick_frame_size_target(struct VP9_COMP *cpi);
-void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
+void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
int this_frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
// Picks q and q bounds given the target for bits
-int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
+int vp9_rc_pick_q_and_adjust_q_bounds(const struct VP9_COMP *cpi,
int *bottom_index,
int *top_index);
// Estimates q to achieve a target bits per frame
-int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
+int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame,
int active_best_quality, int active_worst_quality);
// Post encode update of the rate control parameters based
// on bytes used
-void vp9_rc_postencode_update(VP9_COMP *cpi,
+void vp9_rc_postencode_update(struct VP9_COMP *cpi,
uint64_t bytes_used);
// for dropped frames
-void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi);
+void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
// estimates bits per mb for a given qindex and correction factor
int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
double correction_factor);
// Post encode update of the rate control parameters for 2-pass
-void vp9_twopass_postencode_update(VP9_COMP *cpi,
+void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
uint64_t bytes_used);
// Decide if we should drop this frame: For 1-pass CBR.
-int vp9_drop_frame(VP9_COMP *cpi);
+int vp9_drop_frame(struct VP9_COMP *cpi);
// Update the buffer level.
-void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size);
+void vp9_update_buffer_level(struct VP9_COMP *cpi, int encoded_frame_size);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 3be79f46b..f375a88ff 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -280,22 +280,24 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
fill_token_costs(x->token_costs, cm->fc.coef_probs);
- for (i = 0; i < PARTITION_CONTEXTS; i++)
- vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
- vp9_partition_tree);
-
- fill_mode_costs(cpi);
-
- if (!frame_is_intra_only(cm)) {
- vp9_build_nmv_cost_table(x->nmvjointcost,
- cm->allow_high_precision_mv ? x->nmvcost_hp
- : x->nmvcost,
- &cm->fc.nmvc,
- cm->allow_high_precision_mv, 1, 1);
-
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- vp9_cost_tokens((int *)x->inter_mode_cost[i],
- cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+ if (cpi->compressor_speed != 3) {
+ for (i = 0; i < PARTITION_CONTEXTS; i++)
+ vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
+ vp9_partition_tree);
+
+ fill_mode_costs(cpi);
+
+ if (!frame_is_intra_only(cm)) {
+ vp9_build_nmv_cost_table(x->nmvjointcost,
+ cm->allow_high_precision_mv ? x->nmvcost_hp
+ : x->nmvcost,
+ &cm->fc.nmvc,
+ cm->allow_high_precision_mv, 1, 1);
+
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp9_cost_tokens((int *)x->inter_mode_cost[i],
+ cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+ }
}
}
@@ -419,12 +421,22 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
+
if (i == 0)
x->pred_sse[ref] = sse;
-
- dist_sum += (int)sse;
+ if (cpi->compressor_speed > 2) {
+ dist_sum += (int)sse;
+ } else {
+ int rate;
+ int64_t dist;
+ model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+ pd->dequant[1] >> 3, &rate, &dist);
+ rate_sum += rate;
+ dist_sum += (int)dist;
+ }
}
*out_rate_sum = rate_sum;
@@ -1517,8 +1529,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
- &xd->block_refs[ref]->sf,
- width, height, ref, &xd->subpix, MV_PRECISION_Q3,
+ &xd->block_refs[ref]->sf, width, height, ref,
+ xd->interp_kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2));
}
@@ -1840,7 +1852,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
&bsi->ref_mv->as_mv,
cm->allow_high_precision_mv,
x->errorperbit, v_fn_ptr,
- 0, cpi->sf.subpel_iters_per_step,
+ cpi->sf.subpel_force_stop,
+ cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&distortion,
&x->pred_sse[mbmi->ref_frame[0]]);
@@ -2451,7 +2464,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
cm->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
- 0, cpi->sf.subpel_iters_per_step,
+ cpi->sf.subpel_force_stop,
+ cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref]);
}
@@ -2466,6 +2480,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
+ return;
}
static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2536,7 +2551,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
&frame_mv[refs[!id]].as_mv,
&xd->block_refs[!id]->sf,
pw, ph, 0,
- &xd->subpix, MV_PRECISION_Q3,
+ xd->interp_kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE);
// Compound motion search on first ref frame.
@@ -2783,7 +2798,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int j;
int64_t rs_rd;
mbmi->interp_filter = i;
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
rs = get_switchable_rate(x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -2854,7 +2869,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Set the appropriate filter
mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
cm->interp_filter : *best_filter;
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
if (pred_exists) {
@@ -3277,13 +3292,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
- set_ref_ptrs(cm, xd, ref_frame - 1, second_ref_frame - 1);
+ set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
mbmi->uv_mode = DC_PRED;
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
- mbmi->interp_filter = cm->interp_filter;
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+ : cm->interp_filter;
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
if (comp_pred) {
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
@@ -3709,7 +3725,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
vp9_zero(best_tx_diff);
}
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1);
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_mode_index,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
&mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
@@ -3902,13 +3918,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
continue;
- set_ref_ptrs(cm, xd, ref_frame - 1, second_ref_frame - 1);
+ set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
mbmi->uv_mode = DC_PRED;
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
- mbmi->interp_filter = cm->interp_filter;
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+ : cm->interp_filter;
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
if (comp_pred) {
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
@@ -4031,8 +4048,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int newbest, rs;
int64_t rs_rd;
mbmi->interp_filter = switchable_filter_index;
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
&mbmi->ref_mvs[ref_frame][0],
second_ref,
@@ -4097,7 +4113,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
tmp_best_filter : cm->interp_filter);
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
if (!pred_exists) {
// Handles the special case when a filter that is not in the
// switchable list (bilinear, 6-tap) is indicated at the frame level
@@ -4442,7 +4458,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_zero(best_tx_diff);
}
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1);
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_mode_index,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
&mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index c2eea0aaa..e822e4c64 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -60,7 +60,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
scale,
16, 16,
which_mv,
- &xd->subpix, MV_PRECISION_Q3, x, y);
+ xd->interp_kernel, MV_PRECISION_Q3, x, y);
vp9_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_size,
@@ -68,7 +68,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
scale,
uv_block_size, uv_block_size,
which_mv,
- &xd->subpix, mv_precision_uv, x, y);
+ xd->interp_kernel, mv_precision_uv, x, y);
vp9_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_size,
@@ -76,7 +76,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
scale,
uv_block_size, uv_block_size,
which_mv,
- &xd->subpix, mv_precision_uv, x, y);
+ xd->interp_kernel, mv_precision_uv, x, y);
}
void vp9_temporal_filter_apply_c(uint8_t *frame1,
@@ -392,7 +392,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
const int num_frames_backward = distance;
const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
- (num_frames_backward + 1);
-
struct scale_factors sf;
switch (blur_type) {
@@ -408,7 +407,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
case 2:
// Forward Blur
-
frames_to_blur_forward = num_frames_forward;
if (frames_to_blur_forward >= max_frames)
@@ -471,22 +469,24 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
strength, &sf);
}
-void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,
- const int group_boost) {
+void vp9_configure_arnr_filter(VP9_COMP *cpi,
+ const unsigned int frames_to_arnr,
+ const int group_boost) {
int half_gf_int;
int frames_after_arf;
int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
int q;
- // Define the arnr filter width for this group of frames:
- // We only filter frames that lie within a distance of half
- // the GF interval from the ARF frame. We also have to trap
- // cases where the filter extends beyond the end of clip.
- // Note: this_frame->frame has been updated in the loop
- // so it now points at the ARF frame.
+ // Define the arnr filter width for this group of frames. We only
+ // filter frames that lie within a distance of half the GF interval
+ // from the ARF frame. We also have to trap cases where the filter
+ // extends beyond the end of the lookahead buffer.
+ // Note: frames_to_arnr parameter is the offset of the arnr
+ // frame from the current frame.
half_gf_int = cpi->rc.baseline_gf_interval >> 1;
- frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1);
+ frames_after_arf = vp9_lookahead_depth(cpi->lookahead)
+ - frames_to_arnr - 1;
switch (cpi->oxcf.arnr_type) {
case 1: // Backward filter
diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h
index eea2f1018..3028d7884 100644
--- a/vp9/encoder/vp9_temporal_filter.h
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -16,8 +16,9 @@ extern "C" {
#endif
void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
-void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,
- const int group_boost);
+void vp9_configure_arnr_filter(VP9_COMP *cpi,
+ const unsigned int frames_to_arnr,
+ const int group_boost);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c
index d81b72bba..ea031fb07 100644
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -163,7 +163,7 @@ static INLINE void transpose_4x4_avx2(__m128i *res) {
res[3] = _mm_unpackhi_epi64(res[2], res[2]);
}
-void fdct4_1d_avx2(__m128i *in) {
+void fdct4_avx2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@@ -196,7 +196,7 @@ void fdct4_1d_avx2(__m128i *in) {
transpose_4x4_avx2(in);
}
-void fadst4_1d_avx2(__m128i *in) {
+void fadst4_avx2(__m128i *in) {
const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@@ -250,20 +250,20 @@ void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
load_buffer_4x4_avx2(input, in, stride);
switch (tx_type) {
case 0: // DCT_DCT
- fdct4_1d_avx2(in);
- fdct4_1d_avx2(in);
+ fdct4_avx2(in);
+ fdct4_avx2(in);
break;
case 1: // ADST_DCT
- fadst4_1d_avx2(in);
- fdct4_1d_avx2(in);
+ fadst4_avx2(in);
+ fdct4_avx2(in);
break;
case 2: // DCT_ADST
- fdct4_1d_avx2(in);
- fadst4_1d_avx2(in);
+ fdct4_avx2(in);
+ fadst4_avx2(in);
break;
case 3: // ADST_ADST
- fadst4_1d_avx2(in);
- fadst4_1d_avx2(in);
+ fadst4_avx2(in);
+ fadst4_avx2(in);
break;
default:
assert(0);
@@ -658,7 +658,7 @@ static INLINE void array_transpose_8x8_avx2(__m128i *in, __m128i *res) {
// 07 17 27 37 47 57 67 77
}
-void fdct8_1d_avx2(__m128i *in) {
+void fdct8_avx2(__m128i *in) {
// constants
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -798,7 +798,7 @@ void fdct8_1d_avx2(__m128i *in) {
array_transpose_8x8_avx2(in, in);
}
-void fadst8_1d_avx2(__m128i *in) {
+void fadst8_avx2(__m128i *in) {
// Constants
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@@ -1034,20 +1034,20 @@ void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
load_buffer_8x8_avx2(input, in, stride);
switch (tx_type) {
case 0: // DCT_DCT
- fdct8_1d_avx2(in);
- fdct8_1d_avx2(in);
+ fdct8_avx2(in);
+ fdct8_avx2(in);
break;
case 1: // ADST_DCT
- fadst8_1d_avx2(in);
- fdct8_1d_avx2(in);
+ fadst8_avx2(in);
+ fdct8_avx2(in);
break;
case 2: // DCT_ADST
- fdct8_1d_avx2(in);
- fadst8_1d_avx2(in);
+ fdct8_avx2(in);
+ fadst8_avx2(in);
break;
case 3: // ADST_ADST
- fadst8_1d_avx2(in);
- fadst8_1d_avx2(in);
+ fadst8_avx2(in);
+ fadst8_avx2(in);
break;
default:
assert(0);
@@ -1216,7 +1216,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
step1_6 = _mm_sub_epi16(in01, in14);
step1_7 = _mm_sub_epi16(in00, in15);
}
- // Work on the first eight values; fdct8_1d(input, even_results);
+ // Work on the first eight values; fdct8(input, even_results);
{
// Add/substract
const __m128i q0 = _mm_add_epi16(input0, input7);
@@ -1730,7 +1730,7 @@ static INLINE void right_shift_16x16_avx2(__m128i *res0, __m128i *res1) {
right_shift_8x8_avx2(res1 + 8, 2);
}
-void fdct16_1d_8col_avx2(__m128i *in) {
+void fdct16_8col_avx2(__m128i *in) {
// perform 16x16 1-D DCT for 8 columns
__m128i i[8], s[8], p[8], t[8], u[16], v[16];
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
@@ -2052,7 +2052,7 @@ void fdct16_1d_8col_avx2(__m128i *in) {
in[15] = _mm_packs_epi32(v[14], v[15]);
}
-void fadst16_1d_8col_avx2(__m128i *in) {
+void fadst16_8col_avx2(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2522,15 +2522,15 @@ void fadst16_1d_8col_avx2(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-void fdct16_1d_avx2(__m128i *in0, __m128i *in1) {
- fdct16_1d_8col_avx2(in0);
- fdct16_1d_8col_avx2(in1);
+void fdct16_avx2(__m128i *in0, __m128i *in1) {
+ fdct16_8col_avx2(in0);
+ fdct16_8col_avx2(in1);
array_transpose_16x16_avx2(in0, in1);
}
-void fadst16_1d_avx2(__m128i *in0, __m128i *in1) {
- fadst16_1d_8col_avx2(in0);
- fadst16_1d_8col_avx2(in1);
+void fadst16_avx2(__m128i *in0, __m128i *in1) {
+ fadst16_8col_avx2(in0);
+ fadst16_8col_avx2(in1);
array_transpose_16x16_avx2(in0, in1);
}
@@ -2540,24 +2540,24 @@ void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
load_buffer_16x16_avx2(input, in0, in1, stride);
switch (tx_type) {
case 0: // DCT_DCT
- fdct16_1d_avx2(in0, in1);
+ fdct16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
- fdct16_1d_avx2(in0, in1);
+ fdct16_avx2(in0, in1);
break;
case 1: // ADST_DCT
- fadst16_1d_avx2(in0, in1);
+ fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
- fdct16_1d_avx2(in0, in1);
+ fdct16_avx2(in0, in1);
break;
case 2: // DCT_ADST
- fdct16_1d_avx2(in0, in1);
+ fdct16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
- fadst16_1d_avx2(in0, in1);
+ fadst16_avx2(in0, in1);
break;
case 3: // ADST_ADST
- fadst16_1d_avx2(in0, in1);
+ fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
- fadst16_1d_avx2(in0, in1);
+ fadst16_avx2(in0, in1);
break;
default:
assert(0);
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 65431bdbf..c876cc273 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -161,7 +161,7 @@ static INLINE void transpose_4x4(__m128i *res) {
res[3] = _mm_unpackhi_epi64(res[2], res[2]);
}
-void fdct4_1d_sse2(__m128i *in) {
+void fdct4_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@@ -194,7 +194,7 @@ void fdct4_1d_sse2(__m128i *in) {
transpose_4x4(in);
}
-void fadst4_1d_sse2(__m128i *in) {
+void fadst4_sse2(__m128i *in) {
const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@@ -248,20 +248,20 @@ void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
load_buffer_4x4(input, in, stride);
switch (tx_type) {
case 0: // DCT_DCT
- fdct4_1d_sse2(in);
- fdct4_1d_sse2(in);
+ fdct4_sse2(in);
+ fdct4_sse2(in);
break;
case 1: // ADST_DCT
- fadst4_1d_sse2(in);
- fdct4_1d_sse2(in);
+ fadst4_sse2(in);
+ fdct4_sse2(in);
break;
case 2: // DCT_ADST
- fdct4_1d_sse2(in);
- fadst4_1d_sse2(in);
+ fdct4_sse2(in);
+ fadst4_sse2(in);
break;
case 3: // ADST_ADST
- fadst4_1d_sse2(in);
- fadst4_1d_sse2(in);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
break;
default:
assert(0);
@@ -656,7 +656,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
// 07 17 27 37 47 57 67 77
}
-void fdct8_1d_sse2(__m128i *in) {
+void fdct8_sse2(__m128i *in) {
// constants
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -796,7 +796,7 @@ void fdct8_1d_sse2(__m128i *in) {
array_transpose_8x8(in, in);
}
-void fadst8_1d_sse2(__m128i *in) {
+void fadst8_sse2(__m128i *in) {
// Constants
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@@ -1032,20 +1032,20 @@ void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
load_buffer_8x8(input, in, stride);
switch (tx_type) {
case 0: // DCT_DCT
- fdct8_1d_sse2(in);
- fdct8_1d_sse2(in);
+ fdct8_sse2(in);
+ fdct8_sse2(in);
break;
case 1: // ADST_DCT
- fadst8_1d_sse2(in);
- fdct8_1d_sse2(in);
+ fadst8_sse2(in);
+ fdct8_sse2(in);
break;
case 2: // DCT_ADST
- fdct8_1d_sse2(in);
- fadst8_1d_sse2(in);
+ fdct8_sse2(in);
+ fadst8_sse2(in);
break;
case 3: // ADST_ADST
- fadst8_1d_sse2(in);
- fadst8_1d_sse2(in);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
break;
default:
assert(0);
@@ -1214,7 +1214,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
step1_6 = _mm_sub_epi16(in01, in14);
step1_7 = _mm_sub_epi16(in00, in15);
}
- // Work on the first eight values; fdct8_1d(input, even_results);
+ // Work on the first eight values; fdct8(input, even_results);
{
// Add/substract
const __m128i q0 = _mm_add_epi16(input0, input7);
@@ -1728,7 +1728,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
right_shift_8x8(res1 + 8, 2);
}
-void fdct16_1d_8col(__m128i *in) {
+void fdct16_8col(__m128i *in) {
// perform 16x16 1-D DCT for 8 columns
__m128i i[8], s[8], p[8], t[8], u[16], v[16];
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
@@ -2050,7 +2050,7 @@ void fdct16_1d_8col(__m128i *in) {
in[15] = _mm_packs_epi32(v[14], v[15]);
}
-void fadst16_1d_8col(__m128i *in) {
+void fadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2520,15 +2520,15 @@ void fadst16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-void fdct16_1d_sse2(__m128i *in0, __m128i *in1) {
- fdct16_1d_8col(in0);
- fdct16_1d_8col(in1);
+void fdct16_sse2(__m128i *in0, __m128i *in1) {
+ fdct16_8col(in0);
+ fdct16_8col(in1);
array_transpose_16x16(in0, in1);
}
-void fadst16_1d_sse2(__m128i *in0, __m128i *in1) {
- fadst16_1d_8col(in0);
- fadst16_1d_8col(in1);
+void fadst16_sse2(__m128i *in0, __m128i *in1) {
+ fadst16_8col(in0);
+ fadst16_8col(in1);
array_transpose_16x16(in0, in1);
}
@@ -2538,24 +2538,24 @@ void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
load_buffer_16x16(input, in0, in1, stride);
switch (tx_type) {
case 0: // DCT_DCT
- fdct16_1d_sse2(in0, in1);
+ fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
- fdct16_1d_sse2(in0, in1);
+ fdct16_sse2(in0, in1);
break;
case 1: // ADST_DCT
- fadst16_1d_sse2(in0, in1);
+ fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
- fdct16_1d_sse2(in0, in1);
+ fdct16_sse2(in0, in1);
break;
case 2: // DCT_ADST
- fdct16_1d_sse2(in0, in1);
+ fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
- fadst16_1d_sse2(in0, in1);
+ fadst16_sse2(in0, in1);
break;
case 3: // ADST_ADST
- fadst16_1d_sse2(in0, in1);
+ fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
- fadst16_1d_sse2(in0, in1);
+ fadst16_sse2(in0, in1);
break;
default:
assert(0);
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 897ecd702..6b181710e 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -563,10 +563,21 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
unsigned int new_qc;
/* Use best quality mode if no deadline is given. */
- if (deadline)
- new_qc = MODE_GOODQUALITY;
- else
- new_qc = MODE_BESTQUALITY;
+ new_qc = MODE_BESTQUALITY;
+
+ if (deadline) {
+ uint64_t duration_us;
+
+ /* Convert duration parameter from stream timebase to microseconds */
+ duration_us = (uint64_t)duration * 1000000
+ * (uint64_t)ctx->cfg.g_timebase.num
+ / (uint64_t)ctx->cfg.g_timebase.den;
+
+ /* If the deadline is more that the duration this frame is to be shown,
+ * use good quality mode. Otherwise use realtime mode.
+ */
+ new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME;
+ }
if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
new_qc = MODE_FIRSTPASS;
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index b637331c3..50f45c200 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -89,14 +89,6 @@ void usage_exit() {
exit(EXIT_FAILURE);
}
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
-
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if (detail) printf(" %s\n", detail);
- exit(EXIT_FAILURE);
-}
-
static void parse_command_line(int argc, const char **argv_,
AppInput *app_input, SvcContext *svc_ctx,
vpx_codec_enc_cfg_t *enc_cfg) {
@@ -201,8 +193,6 @@ int main(int argc, const char **argv) {
vpx_codec_err_t res;
int pts = 0; /* PTS starts at 0 */
int frame_duration = 1; /* 1 timebase tick per frame */
- vpx_codec_cx_pkt_t packet = {0};
- packet.kind = VPX_CODEC_CX_FRAME_PKT;
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
@@ -242,9 +232,7 @@ int main(int argc, const char **argv) {
die_codec(&codec, "Failed to encode frame");
}
if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
- packet.data.frame.pts = pts;
- packet.data.frame.sz = vpx_svc_get_frame_size(&svc_ctx);
- ivf_write_frame_header(outfile, &packet);
+ ivf_write_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
(void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
vpx_svc_get_frame_size(&svc_ctx), outfile);
}
diff --git a/vpxdec.c b/vpxdec.c
index 2a79665f6..cde46e9a7 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -816,17 +816,34 @@ int main_loop(int argc, const char **argv_) {
}
if (single_file) {
+ if (use_y4m) {
+ char buf[Y4M_BUFFER_SIZE] = {0};
+ size_t len = 0;
+ if (frame_out == 1) {
+ // Y4M file header
+ len = y4m_write_file_header(buf, sizeof(buf),
+ vpx_input_ctx.width,
+ vpx_input_ctx.height,
+ &vpx_input_ctx.framerate, img->fmt);
+ if (do_md5) {
+ MD5Update(&md5_ctx, (md5byte *)buf, len);
+ } else {
+ fputs(buf, outfile);
+ }
+ }
+
+ // Y4M frame header
+ len = y4m_write_frame_header(buf, sizeof(buf));
+ if (do_md5) {
+ MD5Update(&md5_ctx, (md5byte *)buf, len);
+ } else {
+ fputs(buf, outfile);
+ }
+ }
+
if (do_md5) {
update_image_md5(img, planes, &md5_ctx);
} else {
- if (use_y4m) {
- if (frame_out == 1) {
- y4m_write_file_header(outfile,
- vpx_input_ctx.width, vpx_input_ctx.height,
- &vpx_input_ctx.framerate, img->fmt);
- }
- y4m_write_frame_header(outfile);
- }
write_image_file(img, planes, outfile);
}
} else {
diff --git a/vpxenc.c b/vpxenc.c
index f19300acf..f772432ad 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -38,6 +38,7 @@
#include "./tools_common.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
+#include "./rate_hist.h"
#include "./vpxstats.h"
#include "./warnings.h"
#include "./webmenc.h"
@@ -472,274 +473,6 @@ void usage_exit() {
exit(EXIT_FAILURE);
}
-
-#define HIST_BAR_MAX 40
-struct hist_bucket {
- int low, high, count;
-};
-
-
-static int merge_hist_buckets(struct hist_bucket *bucket,
- int *buckets_,
- int max_buckets) {
- int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
- int buckets = *buckets_;
- int i;
-
- /* Find the extrema for this list of buckets */
- big_bucket = small_bucket = 0;
- for (i = 0; i < buckets; i++) {
- if (bucket[i].count < bucket[small_bucket].count)
- small_bucket = i;
- if (bucket[i].count > bucket[big_bucket].count)
- big_bucket = i;
- }
-
- /* If we have too many buckets, merge the smallest with an adjacent
- * bucket.
- */
- while (buckets > max_buckets) {
- int last_bucket = buckets - 1;
-
- /* merge the small bucket with an adjacent one. */
- if (small_bucket == 0)
- merge_bucket = 1;
- else if (small_bucket == last_bucket)
- merge_bucket = last_bucket - 1;
- else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
- merge_bucket = small_bucket - 1;
- else
- merge_bucket = small_bucket + 1;
-
- assert(abs(merge_bucket - small_bucket) <= 1);
- assert(small_bucket < buckets);
- assert(big_bucket < buckets);
- assert(merge_bucket < buckets);
-
- if (merge_bucket < small_bucket) {
- bucket[merge_bucket].high = bucket[small_bucket].high;
- bucket[merge_bucket].count += bucket[small_bucket].count;
- } else {
- bucket[small_bucket].high = bucket[merge_bucket].high;
- bucket[small_bucket].count += bucket[merge_bucket].count;
- merge_bucket = small_bucket;
- }
-
- assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
-
- buckets--;
-
- /* Remove the merge_bucket from the list, and find the new small
- * and big buckets while we're at it
- */
- big_bucket = small_bucket = 0;
- for (i = 0; i < buckets; i++) {
- if (i > merge_bucket)
- bucket[i] = bucket[i + 1];
-
- if (bucket[i].count < bucket[small_bucket].count)
- small_bucket = i;
- if (bucket[i].count > bucket[big_bucket].count)
- big_bucket = i;
- }
-
- }
-
- *buckets_ = buckets;
- return bucket[big_bucket].count;
-}
-
-
-static void show_histogram(const struct hist_bucket *bucket,
- int buckets,
- int total,
- int scale) {
- const char *pat1, *pat2;
- int i;
-
- switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
- case 1:
- case 2:
- pat1 = "%4d %2s: ";
- pat2 = "%4d-%2d: ";
- break;
- case 3:
- pat1 = "%5d %3s: ";
- pat2 = "%5d-%3d: ";
- break;
- case 4:
- pat1 = "%6d %4s: ";
- pat2 = "%6d-%4d: ";
- break;
- case 5:
- pat1 = "%7d %5s: ";
- pat2 = "%7d-%5d: ";
- break;
- case 6:
- pat1 = "%8d %6s: ";
- pat2 = "%8d-%6d: ";
- break;
- case 7:
- pat1 = "%9d %7s: ";
- pat2 = "%9d-%7d: ";
- break;
- default:
- pat1 = "%12d %10s: ";
- pat2 = "%12d-%10d: ";
- break;
- }
-
- for (i = 0; i < buckets; i++) {
- int len;
- int j;
- float pct;
-
- pct = (float)(100.0 * bucket[i].count / total);
- len = HIST_BAR_MAX * bucket[i].count / scale;
- if (len < 1)
- len = 1;
- assert(len <= HIST_BAR_MAX);
-
- if (bucket[i].low == bucket[i].high)
- fprintf(stderr, pat1, bucket[i].low, "");
- else
- fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
-
- for (j = 0; j < HIST_BAR_MAX; j++)
- fprintf(stderr, j < len ? "=" : " ");
- fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
- }
-}
-
-
-static void show_q_histogram(const int counts[64], int max_buckets) {
- struct hist_bucket bucket[64];
- int buckets = 0;
- int total = 0;
- int scale;
- int i;
-
-
- for (i = 0; i < 64; i++) {
- if (counts[i]) {
- bucket[buckets].low = bucket[buckets].high = i;
- bucket[buckets].count = counts[i];
- buckets++;
- total += counts[i];
- }
- }
-
- fprintf(stderr, "\nQuantizer Selection:\n");
- scale = merge_hist_buckets(bucket, &buckets, max_buckets);
- show_histogram(bucket, buckets, total, scale);
-}
-
-
-#define RATE_BINS (100)
-struct rate_hist {
- int64_t *pts;
- int *sz;
- int samples;
- int frames;
- struct hist_bucket bucket[RATE_BINS];
- int total;
-};
-
-
-static void init_rate_histogram(struct rate_hist *hist,
- const vpx_codec_enc_cfg_t *cfg,
- const vpx_rational_t *fps) {
- int i;
-
- /* Determine the number of samples in the buffer. Use the file's framerate
- * to determine the number of frames in rc_buf_sz milliseconds, with an
- * adjustment (5/4) to account for alt-refs
- */
- hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
-
- /* prevent division by zero */
- if (hist->samples == 0)
- hist->samples = 1;
-
- hist->pts = calloc(hist->samples, sizeof(*hist->pts));
- hist->sz = calloc(hist->samples, sizeof(*hist->sz));
- for (i = 0; i < RATE_BINS; i++) {
- hist->bucket[i].low = INT_MAX;
- hist->bucket[i].high = 0;
- hist->bucket[i].count = 0;
- }
-}
-
-
-static void destroy_rate_histogram(struct rate_hist *hist) {
- free(hist->pts);
- free(hist->sz);
-}
-
-
-static void update_rate_histogram(struct rate_hist *hist,
- const vpx_codec_enc_cfg_t *cfg,
- const vpx_codec_cx_pkt_t *pkt) {
- int i, idx;
- int64_t now, then, sum_sz = 0, avg_bitrate;
-
- now = pkt->data.frame.pts * 1000
- * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
-
- idx = hist->frames++ % hist->samples;
- hist->pts[idx] = now;
- hist->sz[idx] = (int)pkt->data.frame.sz;
-
- if (now < cfg->rc_buf_initial_sz)
- return;
-
- then = now;
-
- /* Sum the size over the past rc_buf_sz ms */
- for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) {
- int i_idx = (i - 1) % hist->samples;
-
- then = hist->pts[i_idx];
- if (now - then > cfg->rc_buf_sz)
- break;
- sum_sz += hist->sz[i_idx];
- }
-
- if (now == then)
- return;
-
- avg_bitrate = sum_sz * 8 * 1000 / (now - then);
- idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
- if (idx < 0)
- idx = 0;
- if (idx > RATE_BINS - 1)
- idx = RATE_BINS - 1;
- if (hist->bucket[idx].low > avg_bitrate)
- hist->bucket[idx].low = (int)avg_bitrate;
- if (hist->bucket[idx].high < avg_bitrate)
- hist->bucket[idx].high = (int)avg_bitrate;
- hist->bucket[idx].count++;
- hist->total++;
-}
-
-
-static void show_rate_histogram(struct rate_hist *hist,
- const vpx_codec_enc_cfg_t *cfg,
- int max_buckets) {
- int i, scale;
- int buckets = 0;
-
- for (i = 0; i < RATE_BINS; i++) {
- if (hist->bucket[i].low == INT_MAX)
- continue;
- hist->bucket[buckets++] = hist->bucket[i];
- }
-
- fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
- scale = merge_hist_buckets(hist->bucket, &buckets, max_buckets);
- show_histogram(hist->bucket, buckets, hist->total, scale);
-}
-
#define mmin(a, b) ((a) < (b) ? (a) : (b))
static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
int yloc[4], int uloc[4], int vloc[4]) {
@@ -884,7 +617,7 @@ struct stream_state {
struct stream_state *next;
struct stream_config config;
FILE *file;
- struct rate_hist rate_hist;
+ struct rate_hist *rate_hist;
struct EbmlGlobal ebml;
uint32_t hash;
uint64_t psnr_sse_total;
@@ -1576,7 +1309,7 @@ static void get_cx_data(struct stream_state *stream,
if (!global->quiet)
fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz);
- update_rate_histogram(&stream->rate_hist, cfg, pkt);
+ update_rate_histogram(stream->rate_hist, cfg, pkt);
if (stream->config.write_webm) {
/* Update the hash */
if (!stream->ebml.debug)
@@ -1590,7 +1323,7 @@ static void get_cx_data(struct stream_state *stream,
ivf_header_pos = ftello(stream->file);
fsize = pkt->data.frame.sz;
- ivf_write_frame_header(stream->file, pkt);
+ ivf_write_frame_header(stream->file, pkt->data.frame.pts, fsize);
} else {
fsize += pkt->data.frame.sz;
@@ -1881,9 +1614,9 @@ int main(int argc, const char **argv_) {
: VPX_IMG_FMT_YV12,
input.width, input.height, 32);
- FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
- &stream->config.cfg,
- &global.framerate));
+ FOREACH_STREAM(stream->rate_hist =
+ init_rate_histogram(&stream->config.cfg,
+ &global.framerate));
}
FOREACH_STREAM(setup_pass(stream, &global, pass));
@@ -2020,10 +1753,10 @@ int main(int argc, const char **argv_) {
global.show_q_hist_buckets));
if (global.show_rate_hist_buckets)
- FOREACH_STREAM(show_rate_histogram(&stream->rate_hist,
+ FOREACH_STREAM(show_rate_histogram(stream->rate_hist,
&stream->config.cfg,
global.show_rate_hist_buckets));
- FOREACH_STREAM(destroy_rate_histogram(&stream->rate_hist));
+ FOREACH_STREAM(destroy_rate_histogram(stream->rate_hist));
#if CONFIG_INTERNAL_STATS
/* TODO(jkoleszar): This doesn't belong in this executable. Do it for now,
diff --git a/y4menc.c b/y4menc.c
index 8321b432e..8b1c95e2b 100644
--- a/y4menc.c
+++ b/y4menc.c
@@ -10,21 +10,18 @@
#include "./y4menc.h"
-void y4m_write_file_header(FILE *file, int width, int height,
- const struct VpxRational *framerate,
- vpx_img_fmt_t fmt) {
- const char *color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
- fmt == VPX_IMG_FMT_I444 ? "C444\n" :
- fmt == VPX_IMG_FMT_I422 ? "C422\n" :
- "C420jpeg\n";
+int y4m_write_file_header(char *buf, size_t len, int width, int height,
+ const struct VpxRational *framerate,
+ vpx_img_fmt_t fmt) {
+ const char *const color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
+ fmt == VPX_IMG_FMT_I444 ? "C444\n" :
+ fmt == VPX_IMG_FMT_I422 ? "C422\n" :
+ "C420jpeg\n";
- // Note: We can't output an aspect ratio here because IVF doesn't
- // store one, and neither does VP8.
- // That will have to wait until these tools support WebM natively.*/
- fprintf(file, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
- framerate->numerator, framerate->denominator, 'p', color);
+ return snprintf(buf, len, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
+ framerate->numerator, framerate->denominator, 'p', color);
}
-void y4m_write_frame_header(FILE *file) {
- fprintf(file, "FRAME\n");
+int y4m_write_frame_header(char *buf, size_t len) {
+ return snprintf(buf, len, "FRAME\n");
}
diff --git a/y4menc.h b/y4menc.h
index 95f7909b1..0fabf56eb 100644
--- a/y4menc.h
+++ b/y4menc.h
@@ -11,8 +11,6 @@
#ifndef Y4MENC_H_
#define Y4MENC_H_
-#include <stdio.h>
-
#include "./tools_common.h"
#include "vpx/vpx_decoder.h"
@@ -21,12 +19,12 @@
extern "C" {
#endif
-void y4m_write_file_header(FILE *file, int width, int height,
- const struct VpxRational *framerate,
- vpx_img_fmt_t fmt);
-
-void y4m_write_frame_header(FILE *file);
+#define Y4M_BUFFER_SIZE 128
+int y4m_write_file_header(char *buf, size_t len, int width, int height,
+ const struct VpxRational *framerate,
+ vpx_img_fmt_t fmt);
+int y4m_write_frame_header(char *buf, size_t len);
#ifdef __cplusplus
} // extern "C"