summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_alloccommon.c189
-rw-r--r--vp9/common/vp9_alloccommon.h17
-rw-r--r--vp9/common/vp9_blockd.h10
-rw-r--r--vp9/common/vp9_entropymv.c51
-rw-r--r--vp9/common/vp9_enums.h6
-rw-r--r--vp9/common/vp9_loopfilter.c77
-rw-r--r--vp9/common/vp9_mvref_common.c11
-rw-r--r--vp9/common/vp9_onyxc_int.h30
-rw-r--r--vp9/common/vp9_postproc.h1
-rw-r--r--vp9/common/vp9_reconinter.c6
-rw-r--r--vp9/common/vp9_reconintra.c4
-rw-r--r--vp9/common/vp9_rtcd_defs.pl4
-rw-r--r--vp9/common/vp9_scale.c4
-rw-r--r--vp9/decoder/vp9_decodeframe.c165
-rw-r--r--vp9/decoder/vp9_decodeframe.h5
-rw-r--r--vp9/decoder/vp9_decodemv.c14
-rw-r--r--vp9/decoder/vp9_decoder.c98
-rw-r--r--vp9/decoder/vp9_decoder.h20
-rw-r--r--vp9/decoder/vp9_dthread.h1
-rw-r--r--vp9/encoder/vp9_aq_complexity.c104
-rw-r--r--vp9/encoder/vp9_aq_complexity.h34
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c (renamed from vp9/encoder/vp9_craq.c)247
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h50
-rw-r--r--vp9/encoder/vp9_aq_variance.c (renamed from vp9/encoder/vp9_vaq.c)2
-rw-r--r--vp9/encoder/vp9_aq_variance.h (renamed from vp9/encoder/vp9_vaq.h)6
-rw-r--r--vp9/encoder/vp9_bitstream.c28
-rw-r--r--vp9/encoder/vp9_block.h5
-rw-r--r--vp9/encoder/vp9_craq.h48
-rw-r--r--vp9/encoder/vp9_encodeframe.c876
-rw-r--r--vp9/encoder/vp9_encodemv.c22
-rw-r--r--vp9/encoder/vp9_encodemv.h3
-rw-r--r--vp9/encoder/vp9_firstpass.c174
-rw-r--r--vp9/encoder/vp9_firstpass.h1
-rw-r--r--vp9/encoder/vp9_mcomp.c160
-rw-r--r--vp9/encoder/vp9_onyx_if.c993
-rw-r--r--vp9/encoder/vp9_onyx_int.h375
-rw-r--r--vp9/encoder/vp9_picklpf.c38
-rw-r--r--vp9/encoder/vp9_picklpf.h4
-rw-r--r--vp9/encoder/vp9_pickmode.c66
-rw-r--r--vp9/encoder/vp9_ratectrl.c78
-rw-r--r--vp9/encoder/vp9_ratectrl.h6
-rw-r--r--vp9/encoder/vp9_rdopt.c166
-rw-r--r--vp9/encoder/vp9_sad.c2
-rw-r--r--vp9/encoder/vp9_speed_features.c369
-rw-r--r--vp9/encoder/vp9_speed_features.h319
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c161
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h21
-rw-r--r--vp9/encoder/vp9_tokenize.c11
-rw-r--r--vp9/encoder/vp9_variance.c43
-rw-r--r--vp9/encoder/vp9_variance.h18
-rw-r--r--vp9/encoder/vp9_write_bit_buffer.c34
-rw-r--r--vp9/encoder/vp9_write_bit_buffer.h28
-rw-r--r--vp9/encoder/x86/vp9_sad4d_intrin_avx2.c167
-rw-r--r--vp9/vp9_cx_iface.c118
-rw-r--r--vp9/vp9_dx_iface.c44
-rw-r--r--vp9/vp9cx.mk14
56 files changed, 3070 insertions, 2478 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 920aefa79..0f26f6e76 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
@@ -18,44 +17,15 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_systemdependent.h"
-void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
- const int stride = cm->mode_info_stride;
- int i;
-
- // Clear down top border row
- vpx_memset(mi, 0, sizeof(MODE_INFO) * stride);
-
- // Clear left border column
- for (i = 1; i < cm->mi_rows + 1; i++)
- vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO));
-}
-
-void vp9_free_frame_buffers(VP9_COMMON *cm) {
+static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
int i;
- for (i = 0; i < FRAME_BUFFERS; i++) {
- vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
-
- if (cm->frame_bufs[i].ref_count > 0 &&
- cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
- cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
- cm->frame_bufs[i].ref_count = 0;
- }
- }
-
- vp9_free_frame_buffer(&cm->post_proc_buffer);
-
- vpx_free(cm->mip);
- vpx_free(cm->prev_mip);
- vpx_free(cm->last_frame_seg_map);
- vpx_free(cm->mi_grid_base);
- vpx_free(cm->prev_mi_grid_base);
+ // Top border row
+ vpx_memset(mi, 0, sizeof(*mi) * cm->mode_info_stride);
- cm->mip = NULL;
- cm->prev_mip = NULL;
- cm->last_frame_seg_map = NULL;
- cm->mi_grid_base = NULL;
- cm->prev_mi_grid_base = NULL;
+ // Left border column
+ for (i = 1; i < cm->mi_rows + 1; ++i)
+ vpx_memset(&mi[i * cm->mode_info_stride], 0, sizeof(*mi));
}
static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
@@ -75,13 +45,74 @@ static void setup_mi(VP9_COMMON *cm) {
cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
vpx_memset(cm->mip, 0,
- cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
+ cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
vpx_memset(cm->mi_grid_base, 0,
cm->mode_info_stride * (cm->mi_rows + 1) *
sizeof(*cm->mi_grid_base));
- vp9_update_mode_info_border(cm, cm->prev_mip);
+ clear_mi_border(cm, cm->prev_mip);
+}
+
+static int alloc_mi(VP9_COMMON *cm, int mi_size) {
+ cm->mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip));
+ if (cm->mip == NULL)
+ return 1;
+
+ cm->prev_mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->prev_mip));
+ if (cm->prev_mip == NULL)
+ return 1;
+
+ cm->mi_grid_base =
+ (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
+ if (cm->mi_grid_base == NULL)
+ return 1;
+
+ cm->prev_mi_grid_base =
+ (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
+ if (cm->prev_mi_grid_base == NULL)
+ return 1;
+
+ return 0;
+}
+
+static void free_mi(VP9_COMMON *cm) {
+ vpx_free(cm->mip);
+ vpx_free(cm->prev_mip);
+ vpx_free(cm->mi_grid_base);
+ vpx_free(cm->prev_mi_grid_base);
+
+ cm->mip = NULL;
+ cm->prev_mip = NULL;
+ cm->mi_grid_base = NULL;
+ cm->prev_mi_grid_base = NULL;
+}
+
+void vp9_free_frame_buffers(VP9_COMMON *cm) {
+ int i;
+
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
+ vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
+
+ if (cm->frame_bufs[i].ref_count > 0 &&
+ cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
+ cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
+ cm->frame_bufs[i].ref_count = 0;
+ }
+ }
+
+ vp9_free_frame_buffer(&cm->post_proc_buffer);
+
+ free_mi(cm);
+
+ vpx_free(cm->last_frame_seg_map);
+ cm->last_frame_seg_map = NULL;
+
+ vpx_free(cm->above_context);
+ cm->above_context = NULL;
+
+ vpx_free(cm->above_seg_context);
+ cm->above_seg_context = NULL;
}
int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
@@ -89,7 +120,6 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
const int ss_x = cm->subsampling_x;
const int ss_y = cm->subsampling_y;
- int mi_size;
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
@@ -97,29 +127,8 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
set_mb_mi(cm, aligned_width, aligned_height);
- // Allocation
- mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
-
- vpx_free(cm->mip);
- cm->mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(MODE_INFO));
- if (!cm->mip)
- goto fail;
-
- vpx_free(cm->prev_mip);
- cm->prev_mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(MODE_INFO));
- if (!cm->prev_mip)
- goto fail;
-
- vpx_free(cm->mi_grid_base);
- cm->mi_grid_base =
- (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
- if (!cm->mi_grid_base)
- goto fail;
-
- vpx_free(cm->prev_mi_grid_base);
- cm->prev_mi_grid_base =
- (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
- if (!cm->prev_mi_grid_base)
+ free_mi(cm);
+ if (alloc_mi(cm, cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
goto fail;
setup_mi(cm);
@@ -130,6 +139,21 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
if (!cm->last_frame_seg_map)
goto fail;
+ vpx_free(cm->above_context);
+ cm->above_context =
+ (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+ MAX_MB_PLANE,
+ sizeof(*cm->above_context));
+ if (!cm->above_context)
+ goto fail;
+
+ vpx_free(cm->above_seg_context);
+ cm->above_seg_context =
+ (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
+ sizeof(*cm->above_seg_context));
+ if (!cm->above_seg_context)
+ goto fail;
+
return 0;
fail:
@@ -138,13 +162,11 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
}
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
- int i;
-
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
const int ss_x = cm->subsampling_x;
const int ss_y = cm->subsampling_y;
- int mi_size;
+ int i;
vp9_free_frame_buffers(cm);
@@ -169,25 +191,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
set_mb_mi(cm, aligned_width, aligned_height);
- // Allocation
- mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
-
- cm->mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(MODE_INFO));
- if (!cm->mip)
- goto fail;
-
- cm->prev_mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(MODE_INFO));
- if (!cm->prev_mip)
- goto fail;
-
- cm->mi_grid_base =
- (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
- if (!cm->mi_grid_base)
- goto fail;
-
- cm->prev_mi_grid_base =
- (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
- if (!cm->prev_mi_grid_base)
+ if (alloc_mi(cm, cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
goto fail;
setup_mi(cm);
@@ -197,6 +201,19 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
if (!cm->last_frame_seg_map)
goto fail;
+ cm->above_context =
+ (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+ MAX_MB_PLANE,
+ sizeof(*cm->above_context));
+ if (!cm->above_context)
+ goto fail;
+
+ cm->above_seg_context =
+ (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
+ sizeof(*cm->above_seg_context));
+ if (!cm->above_seg_context)
+ goto fail;
+
return 0;
fail:
@@ -209,10 +226,6 @@ void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
}
-void vp9_initialize_common() {
- vp9_init_neighbors();
-}
-
void vp9_update_frame_size(VP9_COMMON *cm) {
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2);
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index fca693541..06636a905 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -12,26 +12,23 @@
#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_
#define VP9_COMMON_VP9_ALLOCCOMMON_H_
-#include "vp9/common/vp9_onyxc_int.h"
-
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_initialize_common();
+struct VP9Common;
-void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
+void vp9_remove_common(struct VP9Common *cm);
-void vp9_remove_common(VP9_COMMON *cm);
+int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height);
-int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height);
-int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height);
-void vp9_free_frame_buffers(VP9_COMMON *cm);
+int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height);
+void vp9_free_frame_buffers(struct VP9Common *cm);
-void vp9_update_frame_size(VP9_COMMON *cm);
+void vp9_update_frame_size(struct VP9Common *cm);
-void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm);
+void vp9_swap_mi_and_prev_mi(struct VP9Common *cm);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index b9e30fe9a..556be6aba 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -208,8 +208,6 @@ typedef struct macroblockd {
// A NULL indicates that the 8x8 is not part of the image
MODE_INFO **mi_8x8;
- MODE_INFO **prev_mi_8x8;
- MODE_INFO *mi_stream;
int up_available;
int left_available;
@@ -237,7 +235,8 @@ typedef struct macroblockd {
int corrupted;
- /* Y,U,V,(A) */
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
@@ -245,7 +244,12 @@ typedef struct macroblockd {
PARTITION_CONTEXT left_seg_context[8];
} MACROBLOCKD;
+static INLINE void init_macroblockd(MACROBLOCKD *xd) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].dqcoeff = xd->dqcoeff[i];
+}
static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
PARTITION_TYPE partition) {
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 197b7c05e..5bb048202 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -8,14 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_entropymv.h"
#define MV_COUNT_SAT 20
#define MV_MAX_UPDATE_FACTOR 128
-/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
#define COMPANDED_MVREF_THRESH 8
const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
@@ -49,32 +48,30 @@ const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = {
static const nmv_context default_nmv_context = {
{32, 64, 96},
- { // NOLINT
- { /* vert component */ // NOLINT
- 128, /* sign */
- {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */
- {216}, /* class0 */
- {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */
- {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
- {64, 96, 64}, /* fp */
- 160, /* class0_hp bit */
- 128, /* hp */
+ {
+ { // Vertical component
+ 128, // sign
+ {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class
+ {216}, // class0
+ {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits
+ {{128, 128, 64}, {96, 112, 64}}, // class0_fp
+ {64, 96, 64}, // fp
+ 160, // class0_hp bit
+ 128, // hp
},
- { /* hor component */ // NOLINT
- 128, /* sign */
- {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */
- {208}, /* class0 */
- {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */
- {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
- {64, 96, 64}, /* fp */
- 160, /* class0_hp bit */
- 128, /* hp */
+ { // Horizontal component
+ 128, // sign
+ {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class
+ {208}, // class0
+ {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits
+ {{128, 128, 64}, {96, 112, 64}}, // class0_fp
+ {64, 96, 64}, // fp
+ 160, // class0_hp bit
+ 128, // hp
}
},
};
-#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
-
static const uint8_t log_in_base_2[] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -121,9 +118,13 @@ static const uint8_t log_in_base_2[] = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
};
+static INLINE int mv_class_base(MV_CLASS_TYPE c) {
+ return c ? CLASS0_SIZE << (c + 2) : 0;
+}
+
MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
- const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? MV_CLASS_10 :
- (MV_CLASS_TYPE)log_in_base_2[z >> 3];
+ const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ?
+ MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3];
if (offset)
*offset = z - mv_class_base(c);
return c;
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index e96e76947..779dce017 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -94,6 +94,12 @@ typedef enum {
SRGB = 7 // RGB
} COLOR_SPACE;
+typedef enum {
+ VP9_LAST_FLAG = 1 << 0,
+ VP9_GOLD_FLAG = 1 << 1,
+ VP9_ALT_FLAG = 1 << 2,
+} VP9_REFFRAME;
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index af8afed84..e48d4178b 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -228,6 +228,12 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
}
}
+static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
+ const MB_MODE_INFO *mbmi) {
+ return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
+ [mode_lf_lut[mbmi->mode]];
+}
+
void vp9_loop_filter_init(VP9_COMMON *cm) {
loop_filter_info_n *lfi = &cm->lf_info;
struct loopfilter *lf = &cm->lf;
@@ -493,27 +499,25 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
const MODE_INFO *mi, const int shift_y,
const int shift_uv,
LOOP_FILTER_MASK *lfm) {
- const BLOCK_SIZE block_size = mi->mbmi.sb_type;
- const TX_SIZE tx_size_y = mi->mbmi.tx_size;
- const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
- const int skip = mi->mbmi.skip;
- const int seg = mi->mbmi.segment_id;
- const int ref = mi->mbmi.ref_frame[0];
- const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
- uint64_t *left_y = &lfm->left_y[tx_size_y];
- uint64_t *above_y = &lfm->above_y[tx_size_y];
- uint64_t *int_4x4_y = &lfm->int_4x4_y;
- uint16_t *left_uv = &lfm->left_uv[tx_size_uv];
- uint16_t *above_uv = &lfm->above_uv[tx_size_uv];
- uint16_t *int_4x4_uv = &lfm->int_4x4_uv;
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ const BLOCK_SIZE block_size = mbmi->sb_type;
+ const TX_SIZE tx_size_y = mbmi->tx_size;
+ const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
+ const int filter_level = get_filter_level(lfi_n, mbmi);
+ uint64_t *const left_y = &lfm->left_y[tx_size_y];
+ uint64_t *const above_y = &lfm->above_y[tx_size_y];
+ uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+ uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+ uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+ uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
int i;
- int w = num_8x8_blocks_wide_lookup[block_size];
- int h = num_8x8_blocks_high_lookup[block_size];
// If filter level is 0 we don't loop filter.
if (!filter_level) {
return;
} else {
+ const int w = num_8x8_blocks_wide_lookup[block_size];
+ const int h = num_8x8_blocks_high_lookup[block_size];
int index = shift_y;
for (i = 0; i < h; i++) {
vpx_memset(&lfm->lfl_y[index], filter_level, w);
@@ -540,7 +544,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
- if (skip && ref > INTRA_FRAME)
+ if (mbmi->skip && is_inter_block(mbmi))
return;
// Here we are adding a mask for the transform size. The transform
@@ -561,12 +565,11 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
// boundaries. These differ from the 4x4 boundaries on the outside edge of
// an 8x8 in that the internal ones can be skipped and don't depend on
// the prediction block size.
- if (tx_size_y == TX_4X4) {
+ if (tx_size_y == TX_4X4)
*int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
- }
- if (tx_size_uv == TX_4X4) {
+
+ if (tx_size_uv == TX_4X4)
*int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
- }
}
// This function does the same thing as the one above with the exception that
@@ -575,22 +578,20 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
static void build_y_mask(const loop_filter_info_n *const lfi_n,
const MODE_INFO *mi, const int shift_y,
LOOP_FILTER_MASK *lfm) {
- const BLOCK_SIZE block_size = mi->mbmi.sb_type;
- const TX_SIZE tx_size_y = mi->mbmi.tx_size;
- const int skip = mi->mbmi.skip;
- const int seg = mi->mbmi.segment_id;
- const int ref = mi->mbmi.ref_frame[0];
- const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
- uint64_t *left_y = &lfm->left_y[tx_size_y];
- uint64_t *above_y = &lfm->above_y[tx_size_y];
- uint64_t *int_4x4_y = &lfm->int_4x4_y;
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ const BLOCK_SIZE block_size = mbmi->sb_type;
+ const TX_SIZE tx_size_y = mbmi->tx_size;
+ const int filter_level = get_filter_level(lfi_n, mbmi);
+ uint64_t *const left_y = &lfm->left_y[tx_size_y];
+ uint64_t *const above_y = &lfm->above_y[tx_size_y];
+ uint64_t *const int_4x4_y = &lfm->int_4x4_y;
int i;
- int w = num_8x8_blocks_wide_lookup[block_size];
- int h = num_8x8_blocks_high_lookup[block_size];
if (!filter_level) {
return;
} else {
+ const int w = num_8x8_blocks_wide_lookup[block_size];
+ const int h = num_8x8_blocks_high_lookup[block_size];
int index = shift_y;
for (i = 0; i < h; i++) {
vpx_memset(&lfm->lfl_y[index], filter_level, w);
@@ -601,7 +602,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
*above_y |= above_prediction_mask[block_size] << shift_y;
*left_y |= left_prediction_mask[block_size] << shift_y;
- if (skip && ref > INTRA_FRAME)
+ if (mbmi->skip && is_inter_block(mbmi))
return;
*above_y |= (size_mask[block_size] &
@@ -610,9 +611,8 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
*left_y |= (size_mask[block_size] &
left_64x64_txform_mask[tx_size_y]) << shift_y;
- if (tx_size_y == TX_4X4) {
+ if (tx_size_y == TX_4X4)
*int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
- }
}
// This function sets up the bit masks for the entire 64x64 region represented
@@ -868,13 +868,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
}
-static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
- const MB_MODE_INFO *mbmi) {
- const int seg = mbmi->segment_id;
- const int ref = mbmi->ref_frame[0];
- return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]];
-}
-
static void filter_selectively_vert(uint8_t *s, int pitch,
unsigned int mask_16x16,
unsigned int mask_8x8,
@@ -953,7 +946,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
// Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] =
- build_lfi(&cm->lf_info, &mi[0].mbmi)))
+ get_filter_level(&cm->lf_info, &mi[0].mbmi)))
continue;
// Build masks based on the transform size of each block
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 9f2c2dfac..c043e6c5b 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -193,11 +193,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int block, int mi_row, int mi_col) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
- const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi ?
- xd->prev_mi_8x8[0] : NULL;
+ const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+ ? cm->prev_mi_grid_visible[mi_row * xd->mode_info_stride + mi_col]
+ : NULL;
+ const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
+
+
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
- const MB_MODE_INFO *const prev_mbmi = cm->coding_use_prev_mi && prev_mi ?
- &prev_mi->mbmi : NULL;
+
int different_ref_found = 0;
int context_counter = 0;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 024b197a1..f564afb9e 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -153,10 +153,6 @@ typedef struct VP9Common {
MODE_INFO **prev_mi_grid_base;
MODE_INFO **prev_mi_grid_visible;
- // Each tile column has its own MODE_INFO stream. This array indexes them by
- // tile column index.
- MODE_INFO **mi_streams;
-
// Persistent mb segment id map used in prediction.
unsigned char *last_frame_seg_map;
@@ -206,6 +202,9 @@ typedef struct VP9Common {
// Handles memory for the codec.
InternalFrameBufferList int_frame_buffers;
+
+ PARTITION_CONTEXT *above_seg_context;
+ ENTROPY_CONTEXT *above_context;
} VP9_COMMON;
static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
@@ -288,12 +287,12 @@ static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
return cm->frame_type == KEY_FRAME || cm->intra_only;
}
-static INLINE void update_partition_context(
- PARTITION_CONTEXT *above_seg_context,
- PARTITION_CONTEXT left_seg_context[8],
- int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) {
- PARTITION_CONTEXT *const above_ctx = above_seg_context + mi_col;
- PARTITION_CONTEXT *const left_ctx = left_seg_context + (mi_row & MI_MASK);
+static INLINE void update_partition_context(MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ BLOCK_SIZE subsize,
+ BLOCK_SIZE bsize) {
+ PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+ PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
// num_4x4_blocks_wide_lookup[bsize] / 2
const int bs = num_8x8_blocks_wide_lookup[bsize];
@@ -305,12 +304,11 @@ static INLINE void update_partition_context(
vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs);
}
-static INLINE int partition_plane_context(
- const PARTITION_CONTEXT *above_seg_context,
- const PARTITION_CONTEXT left_seg_context[8],
- int mi_row, int mi_col, BLOCK_SIZE bsize) {
- const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
- const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
+static INLINE int partition_plane_context(const MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+ const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
const int bsl = mi_width_log2(bsize);
const int bs = 1 << bsl;
diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h
index b07d5d045..ebebc1ae3 100644
--- a/vp9/common/vp9_postproc.h
+++ b/vp9/common/vp9_postproc.h
@@ -13,6 +13,7 @@
#define VP9_COMMON_VP9_POSTPROC_H_
#include "vpx_ports/mem.h"
+#include "vpx_scale/yv12config.h"
#include "vp9/common/vp9_ppflags.h"
#ifdef __cplusplus
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 005f370a0..0a4c33962 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -308,10 +308,8 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
y0_16 = sf->scale_value_y(y0_16, sf);
// Map the top left corner of the block into the reference frame.
- // NOTE: This must be done in this way instead of
- // sf->scale_value_x(x_start + x, sf).
- x0 = sf->scale_value_x(x_start, sf) + sf->scale_value_x(x, sf);
- y0 = sf->scale_value_y(y_start, sf) + sf->scale_value_y(y, sf);
+ x0 = sf->scale_value_x(x_start + x, sf);
+ y0 = sf->scale_value_y(y_start + y, sf);
// Scale the MV and incorporate the sub-pixel offset of the block
// in the reference frame.
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 915c1c155..44951b54d 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -347,6 +347,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+ vpx_memset(left_col, 129, 64);
+
// left
if (left_available) {
if (xd->mb_to_bottom_edge < 0) {
@@ -366,8 +368,6 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
for (i = 0; i < bs; ++i)
left_col[i] = ref[i * ref_stride - 1];
}
- } else {
- vpx_memset(left_col, 129, bs);
}
// TODO(hkuang) do not extend 2*bs pixels for all modes.
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index e4cd9d4a0..b874ef3ba 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -636,7 +636,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vp9_sad4x4x8 sse4/;
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;
@@ -651,7 +651,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
specialize qw/vp9_sad16x32x4d sse2/;
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;
diff --git a/vp9/common/vp9_scale.c b/vp9/common/vp9_scale.c
index e0f1e3410..d3405fcdb 100644
--- a/vp9/common/vp9_scale.c
+++ b/vp9/common/vp9_scale.c
@@ -13,11 +13,11 @@
#include "vp9/common/vp9_scale.h"
static INLINE int scaled_x(int val, const struct scale_factors *sf) {
- return val * sf->x_scale_fp >> REF_SCALE_SHIFT;
+ return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
}
static INLINE int scaled_y(int val, const struct scale_factors *sf) {
- return val * sf->y_scale_fp >> REF_SCALE_SHIFT;
+ return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT);
}
static int unscaled_value(int val, const struct scale_factors *sf) {
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 728d92dfb..2f15bfb52 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -187,53 +187,13 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
xd->plane[i].dequant = cm->uv_dequant[q_index];
}
-// Allocate storage for each tile column.
-// TODO(jzern): when max_threads <= 1 the same storage could be used for each
-// tile.
-static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) {
- VP9_COMMON *const cm = &pbi->common;
- const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- int i, tile_row, tile_col;
-
- CHECK_MEM_ERROR(cm, cm->mi_streams,
- vpx_realloc(cm->mi_streams, tile_rows * tile_cols *
- sizeof(*cm->mi_streams)));
- for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo tile;
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- cm->mi_streams[tile_row * tile_cols + tile_col] =
- &cm->mi[tile.mi_row_start * cm->mode_info_stride + tile.mi_col_start];
- }
- }
-
- // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
- // block where mi unit size is 8x8.
- CHECK_MEM_ERROR(cm, pbi->above_context[0],
- vpx_realloc(pbi->above_context[0],
- sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
- 2 * aligned_mi_cols));
- for (i = 1; i < MAX_MB_PLANE; ++i) {
- pbi->above_context[i] = pbi->above_context[0] +
- i * sizeof(*pbi->above_context[0]) *
- 2 * aligned_mi_cols;
- }
-
- // This is sized based on the entire frame. Each tile operates within its
- // column bounds.
- CHECK_MEM_ERROR(cm, pbi->above_seg_context,
- vpx_realloc(pbi->above_seg_context,
- sizeof(*pbi->above_seg_context) *
- aligned_mi_cols));
-}
-
static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
TX_SIZE tx_size, uint8_t *dst, int stride,
int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
if (eob > 0) {
TX_TYPE tx_type;
- const int plane_type = pd->plane_type;
+ const PLANE_TYPE plane_type = pd->plane_type;
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
switch (tx_size) {
case TX_4X4:
@@ -281,7 +241,7 @@ struct intra_args {
static void predict_and_reconstruct_intra_block(int plane, int block,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
- struct intra_args *const args = arg;
+ struct intra_args *const args = (struct intra_args *)arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -317,7 +277,7 @@ struct inter_args {
static void reconstruct_inter_block(int plane, int block,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
- struct inter_args *args = arg;
+ struct inter_args *args = (struct inter_args *)arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -339,13 +299,10 @@ static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
const int x_mis = MIN(bw, cm->mi_cols - mi_col);
const int y_mis = MIN(bh, cm->mi_rows - mi_row);
const int offset = mi_row * cm->mode_info_stride + mi_col;
- const int tile_offset = tile->mi_row_start * cm->mode_info_stride +
- tile->mi_col_start;
int x, y;
xd->mi_8x8 = cm->mi_grid_visible + offset;
- xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
- xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset;
+ xd->mi_8x8[0] = &cm->mi[offset];
xd->mi_8x8[0]->mbmi.sb_type = bsize;
for (y = 0; y < y_mis; ++y)
for (x = !y; x < x_mis; ++x)
@@ -428,16 +385,14 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
int mi_row, int mi_col, BLOCK_SIZE bsize,
vp9_reader *r) {
- const int ctx = partition_plane_context(xd->above_seg_context,
- xd->left_seg_context,
- mi_row, mi_col, bsize);
+ const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
const vp9_prob *const probs = get_partition_probs(cm, ctx);
const int has_rows = (mi_row + hbs) < cm->mi_rows;
const int has_cols = (mi_col + hbs) < cm->mi_cols;
PARTITION_TYPE p;
if (has_rows && has_cols)
- p = vp9_read_tree(r, vp9_partition_tree, probs);
+ p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs);
else if (!has_rows && has_cols)
p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
else if (has_rows && !has_cols)
@@ -495,8 +450,7 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
// update partition context
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
- update_partition_context(xd->above_seg_context, xd->left_seg_context,
- mi_row, mi_col, subsize, bsize);
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
static void setup_token_decoder(const uint8_t *data,
@@ -664,9 +618,7 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
read_frame_size(rb, &cm->display_width, &cm->display_height);
}
-static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
- VP9_COMMON *cm = &pbi->common;
-
+static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
if (cm->width != width || cm->height != height) {
// Change in frame size.
// TODO(agrange) Don't test width/height, check overall size.
@@ -693,18 +645,15 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
}
}
-static void setup_frame_size(VP9D_COMP *pbi,
- struct vp9_read_bit_buffer *rb) {
+static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
int width, height;
read_frame_size(rb, &width, &height);
- apply_frame_size(pbi, width, height);
- setup_display_size(&pbi->common, rb);
+ apply_frame_size(cm, width, height);
+ setup_display_size(cm, rb);
}
-static void setup_frame_size_with_refs(VP9D_COMP *pbi,
+static void setup_frame_size_with_refs(VP9_COMMON *cm,
struct vp9_read_bit_buffer *rb) {
- VP9_COMMON *const cm = &pbi->common;
-
int width, height;
int found = 0, i;
for (i = 0; i < REFS_PER_FRAME; ++i) {
@@ -724,23 +673,19 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame with invalid size");
- apply_frame_size(pbi, width, height);
+ apply_frame_size(cm, width, height);
setup_display_size(cm, rb);
}
-static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
+static void setup_tile_context(VP9_COMMON *cm, MACROBLOCKD *const xd,
int tile_row, int tile_col) {
- VP9_COMMON *const cm = &pbi->common;
- const int tile_cols = 1 << cm->log2_tile_cols;
int i;
- xd->mi_stream = cm->mi_streams[tile_row * tile_cols + tile_col];
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->above_context[i] = cm->above_context +
+ i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- xd->above_context[i] = pbi->above_context[i];
- }
- // see note in alloc_tile_storage().
- xd->above_seg_context = pbi->above_seg_context;
+ xd->above_seg_context = cm->above_seg_context;
}
static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
@@ -848,7 +793,9 @@ typedef struct TileBuffer {
int col; // only used with multi-threaded decoding
} TileBuffer;
-static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
+static const uint8_t *decode_tiles(VP9D_COMP *pbi,
+ const uint8_t *data,
+ const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
@@ -856,7 +803,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
const int tile_rows = 1 << cm->log2_tile_rows;
TileBuffer tile_buffers[4][1 << 6];
int tile_row, tile_col;
- const uint8_t *const data_end = pbi->source + pbi->source_sz;
const uint8_t *end = NULL;
vp9_reader r;
@@ -865,11 +811,11 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
- vpx_memset(pbi->above_context[0], 0,
- sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols);
+ vpx_memset(cm->above_context, 0,
+ sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
- vpx_memset(pbi->above_seg_context, 0,
- sizeof(*pbi->above_seg_context) * aligned_cols);
+ vpx_memset(cm->above_seg_context, 0,
+ sizeof(*cm->above_seg_context) * aligned_cols);
// Load tile data into tile_buffers
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
@@ -896,7 +842,7 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
vp9_tile_init(&tile, cm, tile_row, col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r);
- setup_tile_context(pbi, xd, tile_row, col);
+ setup_tile_context(cm, xd, tile_row, col);
decode_tile(pbi, &tile, &r);
if (last_tile)
@@ -907,17 +853,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
return end;
}
-static void setup_tile_macroblockd(TileWorkerData *const tile_data) {
- MACROBLOCKD *xd = &tile_data->xd;
- struct macroblockd_plane *const pd = xd->plane;
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- pd[i].dqcoeff = tile_data->dqcoeff[i];
- vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
- }
-}
-
static int tile_worker_hook(void *arg1, void *arg2) {
TileWorkerData *const tile_data = (TileWorkerData*)arg1;
const TileInfo *const tile = (TileInfo*)arg2;
@@ -949,10 +884,11 @@ static int compare_tile_buffers(const void *a, const void *b) {
}
}
-static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
+static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
+ const uint8_t *data,
+ const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
const uint8_t *bit_reader_end = NULL;
- const uint8_t *const data_end = pbi->source + pbi->source_sz;
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -992,11 +928,10 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
- vpx_memset(pbi->above_context[0], 0,
- sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
- 2 * aligned_mi_cols);
- vpx_memset(pbi->above_seg_context, 0,
- sizeof(*pbi->above_seg_context) * aligned_mi_cols);
+ vpx_memset(cm->above_context, 0,
+ sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
+ vpx_memset(cm->above_seg_context, 0,
+ sizeof(*cm->above_seg_context) * aligned_mi_cols);
// Load tile data into tile_buffers
for (n = 0; n < tile_cols; ++n) {
@@ -1044,8 +979,10 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader);
- setup_tile_context(pbi, &tile_data->xd, 0, buf->col);
- setup_tile_macroblockd(tile_data);
+
+ setup_tile_context(cm, &tile_data->xd, 0, buf->col);
+ init_macroblockd(&tile_data->xd);
+ vp9_zero(tile_data->xd.dqcoeff);
worker->had_error = 0;
if (i == num_workers - 1 || n == tile_cols - 1) {
@@ -1134,7 +1071,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
if (cm->frame_type == KEY_FRAME) {
check_sync_code(cm, rb);
- cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace
+ cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
if (cm->color_space != SRGB) {
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
if (cm->version == 1) {
@@ -1161,7 +1098,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
cm->frame_refs[i].buf = get_frame_new_buffer(cm);
}
- setup_frame_size(pbi, rb);
+ setup_frame_size(cm, rb);
} else {
cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);
@@ -1172,7 +1109,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
check_sync_code(cm, rb);
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
- setup_frame_size(pbi, rb);
+ setup_frame_size(cm, rb);
} else {
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
@@ -1184,7 +1121,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
}
- setup_frame_size_with_refs(pbi, rb);
+ setup_frame_size_with_refs(cm, rb);
cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
cm->interp_filter = read_interp_filter(rb);
@@ -1332,14 +1269,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
}
#endif // NDEBUG
-int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
- int i;
+int vp9_decode_frame(VP9D_COMP *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- const uint8_t *data = pbi->source;
- const uint8_t *const data_end = pbi->source + pbi->source_sz;
-
struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler };
const size_t first_partition_size = read_uncompressed_header(pbi, &rb);
const int keyframe = cm->frame_type == KEY_FRAME;
@@ -1365,7 +1300,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
pbi->do_loopfilter_inline =
(cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level;
if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
- CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData)));
+ CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
+ vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
@@ -1373,8 +1309,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
}
}
- alloc_tile_storage(pbi, tile_rows, tile_cols);
-
xd->mode_info_stride = cm->mode_info_stride;
if (cm->coding_use_prev_mi)
set_prev_mi(cm);
@@ -1386,8 +1320,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
cm->fc = cm->frame_contexts[cm->frame_context_idx];
vp9_zero(cm->counts);
- for (i = 0; i < MAX_MB_PLANE; ++i)
- vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+ vp9_zero(xd->dqcoeff);
xd->corrupted = 0;
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
@@ -1396,9 +1329,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
// single-frame tile decoding.
if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
cm->frame_parallel_decoding_mode) {
- *p_data_end = decode_tiles_mt(pbi, data + first_partition_size);
+ *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
} else {
- *p_data_end = decode_tiles(pbi, data + first_partition_size);
+ *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
}
new_fb->corrupted |= xd->corrupted;
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index 4537bc824..e474db495 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -20,7 +20,10 @@ struct VP9Common;
struct VP9Decompressor;
void vp9_init_dequantizer(struct VP9Common *cm);
-int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end);
+
+int vp9_decode_frame(struct VP9Decompressor *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ const uint8_t **p_data_end);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 06a21ea7b..bade2b17b 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -63,7 +63,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
TX_SIZE max_tx_size, vp9_reader *r) {
const int ctx = vp9_get_tx_size_context(xd);
const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs);
- TX_SIZE tx_size = (TX_SIZE)vp9_read(r, tx_probs[0]);
+ int tx_size = vp9_read(r, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
tx_size += vp9_read(r, tx_probs[1]);
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
@@ -72,7 +72,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
if (!cm->frame_parallel_decoding_mode)
++get_tx_counts(max_tx_size, ctx, &cm->counts.tx)[tx_size];
- return tx_size;
+ return (TX_SIZE)tx_size;
}
static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_MODE tx_mode,
@@ -237,14 +237,15 @@ static int read_mv_component(vp9_reader *r,
static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *ctx,
nmv_context_counts *counts, int allow_hp) {
- const MV_JOINT_TYPE j = vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints);
+ const MV_JOINT_TYPE joint_type =
+ (MV_JOINT_TYPE)vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints);
const int use_hp = allow_hp && vp9_use_mv_hp(ref);
MV diff = {0, 0};
- if (mv_joint_vertical(j))
+ if (mv_joint_vertical(joint_type))
diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
- if (mv_joint_horizontal(j))
+ if (mv_joint_horizontal(joint_type))
diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
vp9_inc_mv(&diff, counts);
@@ -276,7 +277,8 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
FRAME_COUNTS *const counts = &cm->counts;
if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
+ ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
ref_frame[1] = NONE;
} else {
const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 3aedbef37..17b8c98c8 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -104,22 +104,13 @@ void vp9_initialize_dec() {
static int init_done = 0;
if (!init_done) {
- vp9_initialize_common();
+ vp9_init_neighbors();
vp9_init_quant_tables();
init_done = 1;
}
}
-static void init_macroblockd(VP9D_COMP *const pbi) {
- MACROBLOCKD *xd = &pbi->mb;
- struct macroblockd_plane *const pd = xd->plane;
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; ++i)
- pd[i].dqcoeff = pbi->dqcoeff[i];
-}
-
-VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
+VP9D_COMP *vp9_create_decompressor(const VP9D_CONFIG *oxcf) {
VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP));
VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
@@ -156,7 +147,7 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
cm->error.setjmp = 0;
pbi->decoded_key_frame = 0;
- init_macroblockd(pbi);
+ init_macroblockd(&pbi->mb);
vp9_worker_init(&pbi->lf_worker);
@@ -164,12 +155,10 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
}
void vp9_remove_decompressor(VP9D_COMP *pbi) {
+ VP9_COMMON *const cm = &pbi->common;
int i;
- if (!pbi)
- return;
-
- vp9_remove_common(&pbi->common);
+ vp9_remove_common(cm);
vp9_worker_end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
for (i = 0; i < pbi->num_tile_workers; ++i) {
@@ -181,17 +170,11 @@ void vp9_remove_decompressor(VP9D_COMP *pbi) {
vpx_free(pbi->tile_workers);
if (pbi->num_tile_workers) {
- VP9_COMMON *const cm = &pbi->common;
const int sb_rows =
mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
- VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-
- vp9_loop_filter_dealloc(lf_sync, sb_rows);
+ vp9_loop_filter_dealloc(&pbi->lf_row_sync, sb_rows);
}
- vpx_free(pbi->common.mi_streams);
- vpx_free(pbi->above_context[0]);
- vpx_free(pbi->above_seg_context);
vpx_free(pbi);
}
@@ -228,17 +211,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi,
}
-vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
+vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd) {
- VP9_COMMON *cm = &pbi->common;
RefBuffer *ref_buf = NULL;
- /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
- * encoder is using the frame buffers for. This is just a stub to keep the
- * vpxenc --test-decode functionality working, and will be replaced in a
- * later commit that adds VP9-specific controls for this functionality.
- */
+ // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+ // encoder is using the frame buffers for. This is just a stub to keep the
+ // vpxenc --test-decode functionality working, and will be replaced in a
+ // later commit that adds VP9-specific controls for this functionality.
if (ref_frame_flag == VP9_LAST_FLAG) {
ref_buf = &cm->frame_refs[0];
} else if (ref_frame_flag == VP9_GOLD_FLAG) {
@@ -246,13 +227,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
} else if (ref_frame_flag == VP9_ALT_FLAG) {
ref_buf = &cm->frame_refs[2];
} else {
- vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Invalid reference frame");
- return pbi->common.error.error_code;
+ return cm->error.error_code;
}
if (!equal_dimensions(ref_buf->buf, sd)) {
- vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Incorrect buffer dimensions");
} else {
int *ref_fb_ptr = &ref_buf->idx;
@@ -269,7 +250,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
vp8_yv12_copy_frame(sd, ref_buf->buf);
}
- return pbi->common.error.error_code;
+ return cm->error.error_code;
}
@@ -311,32 +292,21 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
int vp9_receive_compressed_data(VP9D_COMP *pbi,
size_t size, const uint8_t **psource,
int64_t time_stamp) {
- VP9_COMMON *cm = NULL;
+ VP9_COMMON *const cm = &pbi->common;
const uint8_t *source = *psource;
int retcode = 0;
- /*if(pbi->ready_for_new_data == 0)
- return -1;*/
-
- if (!pbi)
- return -1;
-
- cm = &pbi->common;
cm->error.error_code = VPX_CODEC_OK;
- pbi->source = source;
- pbi->source_sz = size;
-
- if (pbi->source_sz == 0) {
- /* This is used to signal that we are missing frames.
- * We do not know if the missing frame(s) was supposed to update
- * any of the reference buffers, but we act conservative and
- * mark only the last buffer as corrupted.
- *
- * TODO(jkoleszar): Error concealment is undefined and non-normative
- * at this point, but if it becomes so, [0] may not always be the correct
- * thing to do here.
- */
+ if (size == 0) {
+ // This is used to signal that we are missing frames.
+ // We do not know if the missing frame(s) was supposed to update
+ // any of the reference buffers, but we act conservative and
+ // mark only the last buffer as corrupted.
+ //
+ // TODO(jkoleszar): Error concealment is undefined and non-normative
+ // at this point, but if it becomes so, [0] may not always be the correct
+ // thing to do here.
if (cm->frame_refs[0].idx != INT_MAX)
cm->frame_refs[0].buf->corrupted = 1;
}
@@ -350,14 +320,13 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
if (setjmp(cm->error.jmp)) {
cm->error.setjmp = 0;
- /* We do not know if the missing frame(s) was supposed to update
- * any of the reference buffers, but we act conservative and
- * mark only the last buffer as corrupted.
- *
- * TODO(jkoleszar): Error concealment is undefined and non-normative
- * at this point, but if it becomes so, [0] may not always be the correct
- * thing to do here.
- */
+ // We do not know if the missing frame(s) was supposed to update
+ // any of the reference buffers, but we act conservative and
+ // mark only the last buffer as corrupted.
+ //
+ // TODO(jkoleszar): Error concealment is undefined and non-normative
+ // at this point, but if it becomes so, [0] may not always be the correct
+ // thing to do here.
if (cm->frame_refs[0].idx != INT_MAX)
cm->frame_refs[0].buf->corrupted = 1;
@@ -369,7 +338,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
cm->error.setjmp = 1;
- retcode = vp9_decode_frame(pbi, psource);
+ retcode = vp9_decode_frame(pbi, source, source + size, psource);
if (retcode < 0) {
cm->error.error_code = VPX_CODEC_ERROR;
@@ -431,7 +400,6 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
pbi->ready_for_new_data = 0;
pbi->last_time_stamp = time_stamp;
- pbi->source_sz = 0;
cm->error.setjmp = 0;
return retcode;
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 4cbff4516..f220ccd02 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -31,30 +31,17 @@ typedef struct {
int width;
int height;
int version;
- int postprocess;
int max_threads;
int inv_tile_order;
- int input_partition;
} VP9D_CONFIG;
-typedef enum {
- VP9_LAST_FLAG = 1,
- VP9_GOLD_FLAG = 2,
- VP9_ALT_FLAG = 4
-} VP9_REFFRAME;
-
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
DECLARE_ALIGNED(16, VP9_COMMON, common);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
-
VP9D_CONFIG oxcf;
- const uint8_t *source;
- size_t source_sz;
-
int64_t last_time_stamp;
int ready_for_new_data;
@@ -72,9 +59,6 @@ typedef struct VP9Decompressor {
int num_tile_workers;
VP9LfSync lf_row_sync;
-
- ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
- PARTITION_CONTEXT *above_seg_context;
} VP9D_COMP;
void vp9_initialize_dec();
@@ -92,7 +76,7 @@ vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
-vpx_codec_err_t vp9_set_reference_dec(struct VP9Decompressor *pbi,
+vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
@@ -100,7 +84,7 @@ int vp9_get_reference_dec(struct VP9Decompressor *pbi,
int index, YV12_BUFFER_CONFIG **fb);
-struct VP9Decompressor *vp9_create_decompressor(VP9D_CONFIG *oxcf);
+struct VP9Decompressor *vp9_create_decompressor(const VP9D_CONFIG *oxcf);
void vp9_remove_decompressor(struct VP9Decompressor *pbi);
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index 6d4450fd2..2f65e1e30 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -24,7 +24,6 @@ typedef struct TileWorkerData {
struct VP9Common *cm;
vp9_reader bit_reader;
DECLARE_ALIGNED(16, struct macroblockd, xd);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
// Row-based parallel loopfilter data
LFWorkerData lfdata;
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c
new file mode 100644
index 000000000..83892e872
--- /dev/null
+++ b/vp9/encoder/vp9_aq_complexity.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "vp9/common/vp9_seg_common.h"
+
+#include "vp9/encoder/vp9_segmentation.h"
+
+static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
+ {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+
+void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ struct segmentation *const seg = &cm->seg;
+
+ // Make SURE use of floating point in this function is safe.
+ vp9_clear_system_state();
+
+ if (cm->frame_type == KEY_FRAME ||
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ int segment;
+
+ // Clear down the segment map.
+ vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
+
+ // Clear down the complexity map used for rd.
+ vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
+
+ vp9_enable_segmentation(seg);
+ vp9_clearall_segfeatures(seg);
+
+ // Select delta coding method.
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ // Segment 0 "Q" feature is disabled so it defaults to the baseline Q.
+ vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
+
+ // Use some of the segments for in frame Q adjustment.
+ for (segment = 1; segment < 2; segment++) {
+ const int qindex_delta =
+ vp9_compute_qdelta_by_rate(cpi,
+ cm->base_qindex,
+ in_frame_q_adj_ratio[segment]);
+ vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
+ vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+ }
+ }
+}
+
+// Select a segment for the current SB64
+void vp9_select_in_frame_q_segment(VP9_COMP *cpi,
+ int mi_row, int mi_col,
+ int output_enabled, int projected_rate) {
+ VP9_COMMON *const cm = &cpi->common;
+
+ const int mi_offset = mi_row * cm->mi_cols + mi_col;
+ const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+ const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+ const int xmis = MIN(cm->mi_cols - mi_col, bw);
+ const int ymis = MIN(cm->mi_rows - mi_row, bh);
+ int complexity_metric = 64;
+ int x, y;
+
+ unsigned char segment;
+
+ if (!output_enabled) {
+ segment = 0;
+ } else {
+ // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
+ // It is converted to bits * 256 units.
+ const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
+ (bw * bh);
+
+ if (projected_rate < (target_rate / 4)) {
+ segment = 1;
+ } else {
+ segment = 0;
+ }
+
+ if (target_rate > 0) {
+ complexity_metric =
+ clamp((int)((projected_rate * 64) / target_rate), 16, 255);
+ }
+ }
+
+ // Fill in the entires in the segment map corresponding to this SB64.
+ for (y = 0; y < ymis; y++) {
+ for (x = 0; x < xmis; x++) {
+ cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
+ cpi->complexity_map[mi_offset + y * cm->mi_cols + x] =
+ (unsigned char)complexity_metric;
+ }
+ }
+}
diff --git a/vp9/encoder/vp9_aq_complexity.h b/vp9/encoder/vp9_aq_complexity.h
new file mode 100644
index 000000000..af031a46c
--- /dev/null
+++ b/vp9/encoder/vp9_aq_complexity.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9_COMP;
+
+// Select a segment for the current SB64.
+void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col,
+ int output_enabled, int projected_rate);
+
+
+// This function sets up a set of segments with delta Q values around
+// the baseline frame quantizer.
+void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
diff --git a/vp9/encoder/vp9_craq.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 40437c77f..1d272e1d9 100644
--- a/vp9/encoder/vp9_craq.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -11,7 +11,7 @@
#include <limits.h>
#include <math.h>
-#include "vp9/encoder/vp9_craq.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/common/vp9_seg_common.h"
@@ -19,19 +19,69 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
+struct CYCLIC_REFRESH {
+ // Percentage of super-blocks per frame that are targeted as candidates
+ // for cyclic refresh.
+ int max_sbs_perframe;
+ // Maximum q-delta as percentage of base q.
+ int max_qdelta_perc;
+ // Block size below which we don't apply cyclic refresh.
+ BLOCK_SIZE min_block_size;
+ // Superblock starting index for cycling through the frame.
+ int sb_index;
+ // Controls how long a block will need to wait to be refreshed again.
+ int time_for_refresh;
+ // Actual number of (8x8) blocks that were applied delta-q (segment 1).
+ int num_seg_blocks;
+ // Actual encoding bits for segment 1.
+ int actual_seg_bits;
+ // RD mult. parameters for segment 1.
+ int rdmult;
+ // Cyclic refresh map.
+ signed char *map;
+ // Projected rate and distortion for the current superblock.
+ int64_t projected_rate_sb;
+ int64_t projected_dist_sb;
+ // Thresholds applied to projected rate/distortion of the superblock.
+ int64_t thresh_rate_sb;
+ int64_t thresh_dist_sb;
+};
+
+CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+ CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
+ if (cr == NULL)
+ return NULL;
+
+ cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
+ if (cr->map == NULL) {
+ vpx_free(cr);
+ return NULL;
+ }
+
+ return cr;
+}
+
+void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
+ vpx_free(cr->map);
+ vpx_free(cr);
+}
// Check if we should turn off cyclic refresh based on bitrate condition.
-static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) {
+static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm,
+ const RATE_CONTROL *rc) {
// Turn off cyclic refresh if bits available per frame is not sufficiently
// larger than bit cost of segmentation. Segment map bit cost should scale
// with number of seg blocks, so compare available bits to number of blocks.
// Average bits available per frame = av_per_frame_bandwidth
// Number of (8x8) blocks in frame = mi_rows * mi_cols;
- float factor = 0.5;
- int number_blocks = cpi->common.mi_rows * cpi->common.mi_cols;
+ const float factor = 0.5;
+ const int number_blocks = cm->mi_rows * cm->mi_cols;
// The condition below corresponds to turning off at target bitrates:
// ~24kbps for CIF, 72kbps for VGA (at 30fps).
- if (cpi->rc.av_per_frame_bandwidth < factor * number_blocks)
+ // Also turn off at very small frame sizes, to avoid too large fraction of
+ // superblocks to be refreshed per frame. Threshold below is less than QCIF.
+ if (rc->av_per_frame_bandwidth < factor * number_blocks ||
+ number_blocks / 64 < 5)
return 0;
else
return 1;
@@ -41,11 +91,9 @@ static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) {
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
// mode, and rate/distortion.
-static int candidate_refresh_aq(VP9_COMP *const cpi,
- MODE_INFO *const mi,
- int bsize,
- int use_rd) {
- CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
+static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
+ const MB_MODE_INFO *mbmi,
+ BLOCK_SIZE bsize, int use_rd) {
if (use_rd) {
// If projected rate is below the thresh_rate (well below target,
// so undershoot expected), accept it for lower-qp coding.
@@ -56,18 +104,18 @@ static int candidate_refresh_aq(VP9_COMP *const cpi,
// 2) mode is non-zero mv and projected distortion is above thresh_dist
// 3) mode is an intra-mode (we may want to allow some of this under
// another thresh_dist)
- else if ((bsize < cr->min_block_size) ||
- (mi->mbmi.mv[0].as_int != 0 &&
- cr->projected_dist_sb > cr->thresh_dist_sb) ||
- !is_inter_block(&mi->mbmi))
+ else if (bsize < cr->min_block_size ||
+ (mbmi->mv[0].as_int != 0 &&
+ cr->projected_dist_sb > cr->thresh_dist_sb) ||
+ !is_inter_block(mbmi))
return 0;
else
return 1;
} else {
// Rate/distortion not used for update.
- if ((bsize < cr->min_block_size) ||
- (mi->mbmi.mv[0].as_int != 0) ||
- !is_inter_block(&mi->mbmi))
+ if (bsize < cr->min_block_size ||
+ mbmi->mv[0].as_int != 0 ||
+ !is_inter_block(mbmi))
return 0;
else
return 1;
@@ -77,33 +125,31 @@ static int candidate_refresh_aq(VP9_COMP *const cpi,
// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
-void vp9_update_segment_aq(VP9_COMP *const cpi,
- MODE_INFO *const mi,
- int mi_row,
- int mi_col,
- int bsize,
- int use_rd) {
- CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
- VP9_COMMON *const cm = &cpi->common;
+void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
+ MB_MODE_INFO *const mbmi,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int use_rd) {
+ const VP9_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
const int xmis = MIN(cm->mi_cols - mi_col, bw);
const int ymis = MIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
+ const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
- int current_segment = mi->mbmi.segment_id;
- int refresh_this_block = candidate_refresh_aq(cpi, mi, bsize, use_rd);
+
// Check if we should reset the segment_id for this block.
- if (current_segment && !refresh_this_block)
- mi->mbmi.segment_id = 0;
+ if (mbmi->segment_id > 0 && !refresh_this_block)
+ mbmi->segment_id = 0;
// Update the cyclic refresh map, to be used for setting segmentation map
// for the next frame. If the block will be refreshed this frame, mark it
// as clean. The magnitude of the -ve influences how long before we consider
// it for refresh again.
- if (mi->mbmi.segment_id == 1) {
+ if (mbmi->segment_id == 1) {
new_map_value = -cr->time_for_refresh;
} else if (refresh_this_block) {
// Else if it is accepted as candidate for refresh, and has not already
@@ -121,54 +167,54 @@ void vp9_update_segment_aq(VP9_COMP *const cpi,
for (x = 0; x < xmis; x++) {
cr->map[block_index + y * cm->mi_cols + x] = new_map_value;
cpi->segmentation_map[block_index + y * cm->mi_cols + x] =
- mi->mbmi.segment_id;
+ mbmi->segment_id;
}
// Keep track of actual number (in units of 8x8) of blocks in segment 1 used
// for encoding this frame.
- if (mi->mbmi.segment_id)
+ if (mbmi->segment_id)
cr->num_seg_blocks += xmis * ymis;
}
// Setup cyclic background refresh: set delta q and segmentation map.
-void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) {
+void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
struct segmentation *const seg = &cm->seg;
- unsigned char *seg_map = cpi->segmentation_map;
- int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cpi);
+ unsigned char *const seg_map = cpi->segmentation_map;
+ const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
// Don't apply refresh on key frame or enhancement layer frames.
if (!apply_cyclic_refresh ||
- (cpi->common.frame_type == KEY_FRAME) ||
+ (cm->frame_type == KEY_FRAME) ||
(cpi->svc.temporal_layer_id > 0)) {
// Set segmentation map to 0 and disable.
vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
vp9_disable_segmentation(&cm->seg);
- if (cpi->common.frame_type == KEY_FRAME)
- cr->mb_index = 0;
+ if (cm->frame_type == KEY_FRAME)
+ cr->sb_index = 0;
return;
} else {
int qindex_delta = 0;
- int mbs_in_frame = cm->mi_rows * cm->mi_cols;
- int i, x, y, block_count, bl_index, bl_index2;
- int sum_map, new_value, mi_row, mi_col, xmis, ymis, qindex2;
+ int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
+ int xmis, ymis, x, y, qindex2;
// Rate target ratio to set q delta.
- float rate_ratio_qdelta = 2.0;
+ const float rate_ratio_qdelta = 2.0;
vp9_clear_system_state();
// Some of these parameters may be set via codec-control function later.
- cr->max_mbs_perframe = 10;
+ cr->max_sbs_perframe = 10;
cr->max_qdelta_perc = 50;
cr->min_block_size = BLOCK_16X16;
cr->time_for_refresh = 1;
// Set rate threshold to some fraction of target (and scaled by 256).
- cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 2;
+ cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2;
// Distortion threshold, quadratic in Q, scale factor to be adjusted.
cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
vp9_convert_qindex_to_q(cm->base_qindex));
if (cpi->sf.use_nonrd_pick_mode) {
// May want to be more conservative with thresholds in non-rd mode for now
// as rate/distortion are derived from model based on prediction residual.
- cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 3;
+ cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3;
cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
vp9_convert_qindex_to_q(cm->base_qindex));
}
@@ -200,68 +246,79 @@ void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) {
rate_ratio_qdelta);
// TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from
// previous encoded frame.
- if ((-qindex_delta) > cr->max_qdelta_perc * cm->base_qindex / 100) {
+ if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100)
qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100;
- }
// Compute rd-mult for segment 1.
qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
cr->rdmult = vp9_compute_rd_mult(cpi, qindex2);
vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta);
- // Number of target macroblocks to get the q delta (segment 1).
- block_count = cr->max_mbs_perframe * mbs_in_frame / 100;
- // Set the segmentation map: cycle through the macroblocks, starting at
+
+ sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+ sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+ sbs_in_frame = sb_cols * sb_rows;
+ // Number of target superblocks to get the q delta (segment 1).
+ block_count = cr->max_sbs_perframe * sbs_in_frame / 100;
+ // Set the segmentation map: cycle through the superblocks, starting at
// cr->mb_index, and stopping when either block_count blocks have been found
// to be refreshed, or we have passed through whole frame.
- // Note the setting of seg_map below is done in two steps (one over 8x8)
- // and then another over SB, in order to keep the value constant over SB.
- // TODO(marpan): Do this in one pass in SB order.
- assert(cr->mb_index < mbs_in_frame);
- i = cr->mb_index;
+ assert(cr->sb_index < sbs_in_frame);
+ i = cr->sb_index;
do {
- // If the macroblock is as a candidate for clean up then mark it
- // for possible boost/refresh (segment 1). The segment id may get reset to
- // 0 later if the macroblock gets coded anything other than ZEROMV.
- if (cr->map[i] == 0) {
- seg_map[i] = 1;
- block_count--;
- } else if (cr->map[i] < 0) {
- cr->map[i]++;
+ int sum_map = 0;
+ // Get the mi_row/mi_col corresponding to superblock index i.
+ int sb_row_index = (i / sb_cols);
+ int sb_col_index = i - sb_row_index * sb_cols;
+ int mi_row = sb_row_index * MI_BLOCK_SIZE;
+ int mi_col = sb_col_index * MI_BLOCK_SIZE;
+ assert(mi_row >= 0 && mi_row < cm->mi_rows);
+ assert(mi_col >= 0 && mi_col < cm->mi_cols);
+ bl_index = mi_row * cm->mi_cols + mi_col;
+ // Loop through all 8x8 blocks in superblock and update map.
+ xmis = MIN(cm->mi_cols - mi_col,
+ num_8x8_blocks_wide_lookup[BLOCK_64X64]);
+ ymis = MIN(cm->mi_rows - mi_row,
+ num_8x8_blocks_high_lookup[BLOCK_64X64]);
+ for (y = 0; y < ymis; y++) {
+ for (x = 0; x < xmis; x++) {
+ const int bl_index2 = bl_index + y * cm->mi_cols + x;
+ // If the block is as a candidate for clean up then mark it
+ // for possible boost/refresh (segment 1). The segment id may get
+ // reset to 0 later if block gets coded anything other than ZEROMV.
+ if (cr->map[bl_index2] == 0) {
+ seg_map[bl_index2] = 1;
+ sum_map++;
+ } else if (cr->map[bl_index2] < 0) {
+ cr->map[bl_index2]++;
+ }
+ }
+ }
+ // Enforce constant segment over superblock.
+ // If segment is partial over superblock, reset to either all 1 or 0.
+ if (sum_map > 0 && sum_map < xmis * ymis) {
+ const int new_value = (sum_map >= xmis * ymis / 2);
+ for (y = 0; y < ymis; y++)
+ for (x = 0; x < xmis; x++)
+ seg_map[bl_index + y * cm->mi_cols + x] = new_value;
}
i++;
- if (i == mbs_in_frame) {
+ if (i == sbs_in_frame) {
i = 0;
}
- } while (block_count && i != cr->mb_index);
- cr->mb_index = i;
- // Enforce constant segment map over superblock.
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE)
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
- bl_index = mi_row * cm->mi_cols + mi_col;
- xmis = num_8x8_blocks_wide_lookup[BLOCK_64X64];
- ymis = num_8x8_blocks_high_lookup[BLOCK_64X64];
- xmis = MIN(cm->mi_cols - mi_col, xmis);
- ymis = MIN(cm->mi_rows - mi_row, ymis);
- sum_map = 0;
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++) {
- bl_index2 = bl_index + y * cm->mi_cols + x;
- sum_map += seg_map[bl_index2];
- }
- new_value = 0;
- // If segment is partial over superblock, reset.
- if (sum_map > 0 && sum_map < xmis * ymis) {
- if (sum_map < xmis * ymis / 2)
- new_value = 0;
- else
- new_value = 1;
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++) {
- bl_index2 = bl_index + y * cm->mi_cols + x;
- seg_map[bl_index2] = new_value;
- }
- }
- }
+ if (sum_map >= xmis * ymis /2)
+ block_count--;
+ } while (block_count && i != cr->sb_index);
+ cr->sb_index = i;
}
}
+
+void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr,
+ int64_t rate_sb, int64_t dist_sb) {
+ cr->projected_rate_sb = rate_sb;
+ cr->projected_dist_sb = dist_sb;
+}
+
+int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
+ return cr->rdmult;
+}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
new file mode 100644
index 000000000..f556d658b
--- /dev/null
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+
+#include "vp9/common/vp9_blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9_COMP;
+
+struct CYCLIC_REFRESH;
+typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
+
+CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols);
+
+void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr);
+
+// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
+// check if we should reset the segment_id, and update the cyclic_refresh map
+// and segmentation map.
+void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
+ MB_MODE_INFO *const mbmi,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int use_rd);
+
+// Setup cyclic background refresh: set delta q and segmentation map.
+void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi);
+
+void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr,
+ int64_t rate_sb, int64_t dist_sb);
+
+int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_aq_variance.c
index c71c171bb..c25eb95c7 100644
--- a/vp9/encoder/vp9_vaq.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -10,7 +10,7 @@
#include <math.h>
-#include "vp9/encoder/vp9_vaq.h"
+#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/common/vp9_seg_common.h"
diff --git a/vp9/encoder/vp9_vaq.h b/vp9/encoder/vp9_aq_variance.h
index c73114aeb..381fe50cf 100644
--- a/vp9/encoder/vp9_vaq.h
+++ b/vp9/encoder/vp9_aq_variance.h
@@ -9,8 +9,8 @@
*/
-#ifndef VP9_ENCODER_VP9_VAQ_H_
-#define VP9_ENCODER_VP9_VAQ_H_
+#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#define VP9_ENCODER_VP9_AQ_VARIANCE_H_
#include "vp9/encoder/vp9_onyx_int.h"
@@ -31,4 +31,4 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_VAQ_H_
+#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 1b4a6cc9b..a1db097bd 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -392,12 +392,10 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
pack_mb_tokens(w, tok, tok_end);
}
-static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col,
+static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int hbs, int mi_row, int mi_col,
PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
- VP9_COMMON *const cm = &cpi->common;
- const int ctx = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
const vp9_prob *const probs = get_partition_probs(cm, ctx);
const int has_rows = (mi_row + hbs) < cm->mi_rows;
const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -415,10 +413,13 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col,
}
}
-static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
+static void write_modes_sb(VP9_COMP *cpi,
+ const TileInfo *const tile,
vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
const int bsl = b_width_log2(bsize);
const int bs = (1 << bsl) / 4;
PARTITION_TYPE partition;
@@ -429,7 +430,7 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
return;
partition = partition_lookup[bsl][m->mbmi.sb_type];
- write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w);
+ write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
subsize = get_subsize(bsize, partition);
if (subsize < BLOCK_8X8) {
write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
@@ -465,20 +466,21 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
// update partition context
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
- update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, subsize, bsize);
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
-static void write_modes(VP9_COMP *cpi, const TileInfo *const tile,
+static void write_modes(VP9_COMP *cpi,
+ const TileInfo *const tile,
vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
int mi_row, mi_col;
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp9_zero(cpi->left_seg_context);
+ vp9_zero(cpi->mb.e_mbd.left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64);
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
+ BLOCK_64X64);
}
}
@@ -930,7 +932,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
- vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) *
+ vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
mi_cols_aligned_to_sb(cm->mi_cols));
tok[0][0] = cpi->tok;
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 168702e90..7729d84b3 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -115,7 +115,6 @@ struct macroblock {
unsigned int source_variance;
unsigned int pred_sse[MAX_REF_FRAMES];
int pred_mv_sad[MAX_REF_FRAMES];
- int mode_sad[MAX_REF_FRAMES][INTER_MODES + 1];
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
@@ -158,7 +157,6 @@ struct macroblock {
// note that token_costs is the cost when eob node is skipped
vp9_coeff_cost token_costs[TX_SIZES];
- DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
int optimize;
@@ -198,7 +196,8 @@ struct macroblock {
// TODO(jingning): the variables used here are little complicated. need further
// refactoring on organizing the temporary buffers, when recursive
// partition down to 4x4 block size is enabled.
-static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
+static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
+ BLOCK_SIZE bsize) {
switch (bsize) {
case BLOCK_64X64:
return &x->sb64_context;
diff --git a/vp9/encoder/vp9_craq.h b/vp9/encoder/vp9_craq.h
deleted file mode 100644
index 1f81f3e77..000000000
--- a/vp9/encoder/vp9_craq.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_CRAQ_H_
-#define VP9_ENCODER_VP9_CRAQ_H_
-
-#include "vp9/encoder/vp9_onyx_int.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Check if we should turn off cyclic refresh based on bitrate condition.
-static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi);
-
-// Check if this coding block, of size bsize, should be considered for refresh
-// (lower-qp coding).
-static int candidate_refresh_aq(VP9_COMP *const cpi,
- MODE_INFO *const mi,
- int bsize,
- int use_rd);
-
-// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
-// check if we should reset the segment_id, and update the cyclic_refresh map
-// and segmentation map.
-void vp9_update_segment_aq(VP9_COMP *const cpi,
- MODE_INFO *const mi,
- int mi_row,
- int mi_col,
- int bsize,
- int use_rd);
-
-// Setup cyclic background refresh: set delta q and segmentation map.
-void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_VP9_CRAQ_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a38592240..80abb1fa5 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -30,6 +30,9 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_tile_common.h"
+#include "vp9/encoder/vp9_aq_complexity.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
@@ -38,8 +41,6 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_vaq.h"
-#include "vp9/encoder/vp9_craq.h"
#define GF_ZEROMV_ZBIN_BOOST 0
#define LF_ZEROMV_ZBIN_BOOST 0
@@ -164,13 +165,12 @@ static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm,
int mi_col) {
const int idx_str = xd->mode_info_stride * mi_row + mi_col;
xd->mi_8x8 = cm->mi_grid_visible + idx_str;
- xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
xd->mi_8x8[0] = cm->mi + idx_str;
}
-static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col,
+static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
- VP9_COMMON *const cm = &cpi->common;
+ const VP9_COMMON *const cm = &cpi->common;
const int mb_rows = cm->mb_rows;
const int mb_cols = cm->mb_cols;
const int mb_row = mi_row >> 1;
@@ -194,6 +194,16 @@ static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col,
return 0;
}
+static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ if (cpi->active_map_enabled && !x->e_mbd.lossless) {
+ return is_block_in_mb_map(cpi, mi_row, mi_col, bsize);
+ } else {
+ return 1;
+ }
+}
+
static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
MACROBLOCK *const x = &cpi->mb;
@@ -207,16 +217,11 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
const int idx_map = mb_row * cm->mb_cols + mb_col;
const struct segmentation *const seg = &cm->seg;
- set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
+ set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col);
// Activity map pointer
x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
-
- if (cpi->active_map_enabled && !x->e_mbd.lossless) {
- x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize);
- } else {
- x->in_active_map = 1;
- }
+ x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
set_modeinfo_offsets(cm, xd, mi_row, mi_col);
@@ -276,7 +281,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
}
}
-static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm,
+static void duplicate_mode_info_in_sb(VP9_COMMON * const cm,
MACROBLOCKD *const xd,
int mi_row,
int mi_col,
@@ -300,7 +305,7 @@ static void set_block_size(VP9_COMP * const cpi,
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col);
xd->mi_8x8[0]->mbmi.sb_type = bsize;
- duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize);
+ duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize);
}
}
@@ -829,52 +834,6 @@ static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
adjust_act_zbin(cpi, x);
}
-// Select a segment for the current SB64
-static void select_in_frame_q_segment(VP9_COMP *cpi,
- int mi_row, int mi_col,
- int output_enabled, int projected_rate) {
- VP9_COMMON *const cm = &cpi->common;
-
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
- const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
- const int xmis = MIN(cm->mi_cols - mi_col, bw);
- const int ymis = MIN(cm->mi_rows - mi_row, bh);
- int complexity_metric = 64;
- int x, y;
-
- unsigned char segment;
-
- if (!output_enabled) {
- segment = 0;
- } else {
- // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
- // It is converted to bits * 256 units
- const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
- (bw * bh);
-
- if (projected_rate < (target_rate / 4)) {
- segment = 1;
- } else {
- segment = 0;
- }
-
- if (target_rate > 0) {
- complexity_metric =
- clamp((int)((projected_rate * 64) / target_rate), 16, 255);
- }
- }
-
- // Fill in the entires in the segment map corresponding to this SB64
- for (y = 0; y < ymis; y++) {
- for (x = 0; x < xmis; x++) {
- cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
- cpi->complexity_map[mi_offset + y * cm->mi_cols + x] =
- (unsigned char)complexity_metric;
- }
- }
-}
-
static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int output_enabled) {
@@ -896,21 +855,17 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
assert(mi->mbmi.sb_type == bsize);
- // For in frame adaptive Q copy over the chosen segment id into the
- // mode innfo context for the chosen mode / partition.
- if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ ||
- cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) &&
+ *mi_addr = *mi;
+
+ // For in frame adaptive Q, check for reseting the segment_id and updating
+ // the cyclic refresh map.
+ if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled &&
output_enabled) {
- // Check for reseting segment_id and update cyclic map.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) {
- vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1);
- vp9_init_plane_quantizers(cpi, x);
- }
- mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+ vp9_cyclic_refresh_update_segment(cpi, &xd->mi_8x8[0]->mbmi,
+ mi_row, mi_col, bsize, 1);
+ vp9_init_plane_quantizers(cpi, x);
}
- *mi_addr = *mi;
-
max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
@@ -985,12 +940,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
#endif
if (!frame_is_intra_only(cm)) {
if (is_inter_block(mbmi)) {
- if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
- MV best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
- vp9_update_mv_count(cm, xd, best_mv);
- }
+ vp9_update_mv_count(cm, xd);
if (cm->interp_filter == SWITCHABLE) {
const int ctx = vp9_get_pred_context_switchable_interp(xd);
@@ -1111,7 +1061,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
: cm->last_frame_seg_map;
// If segment 1, use rdmult for that segment.
if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col))
- x->rdmult = cpi->cyclic_refresh.rdmult;
+ x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
}
// Find best coding mode & reconstruct the MB so it is available
@@ -1211,21 +1161,21 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
int mi_height = num_8x8_blocks_high_lookup[bsize];
for (p = 0; p < MAX_MB_PLANE; p++) {
vpx_memcpy(
- cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
+ xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
a + num_4x4_blocks_wide * p,
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
xd->plane[p].subsampling_x);
vpx_memcpy(
- cpi->left_context[p]
+ xd->left_context[p]
+ ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
l + num_4x4_blocks_high * p,
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
xd->plane[p].subsampling_y);
}
- vpx_memcpy(cpi->above_seg_context + mi_col, sa,
- sizeof(*cpi->above_seg_context) * mi_width);
- vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl,
- sizeof(cpi->left_seg_context[0]) * mi_height);
+ vpx_memcpy(xd->above_seg_context + mi_col, sa,
+ sizeof(*xd->above_seg_context) * mi_width);
+ vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
+ sizeof(xd->left_seg_context[0]) * mi_height);
}
static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
@@ -1244,20 +1194,20 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
for (p = 0; p < MAX_MB_PLANE; ++p) {
vpx_memcpy(
a + num_4x4_blocks_wide * p,
- cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
+ xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
xd->plane[p].subsampling_x);
vpx_memcpy(
l + num_4x4_blocks_high * p,
- cpi->left_context[p]
+ xd->left_context[p]
+ ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
xd->plane[p].subsampling_y);
}
- vpx_memcpy(sa, cpi->above_seg_context + mi_col,
- sizeof(*cpi->above_seg_context) * mi_width);
- vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK),
- sizeof(cpi->left_seg_context[0]) * mi_height);
+ vpx_memcpy(sa, xd->above_seg_context + mi_col,
+ sizeof(*xd->above_seg_context) * mi_width);
+ vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
+ sizeof(xd->left_seg_context[0]) * mi_height);
}
static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
@@ -1289,6 +1239,8 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
int output_enabled, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
int ctx;
PARTITION_TYPE partition;
@@ -1298,8 +1250,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
return;
if (bsize >= BLOCK_8X8) {
- ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
subsize = *get_sb_partitioning(x, bsize);
} else {
ctx = 0;
@@ -1354,8 +1305,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
- update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, subsize, bsize);
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
// Check to see if the given partition size is allowed for a specified number
@@ -1432,6 +1382,7 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
for (block_col = 0; block_col < 8; ++block_col) {
MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+
if (prev_mi) {
const ptrdiff_t offset = prev_mi - cm->prev_mi;
mi_8x8[block_row * mis + block_col] = cm->mi + offset;
@@ -1460,45 +1411,39 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
return 0;
}
-static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx,
+static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, int bsize) {
- int i;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
const struct segmentation *const seg = &cm->seg;
- // TODO(jingning) We might need PICK_MODE_CONTEXT to buffer coding modes
- // associated with variable block sizes. Otherwise, remove this ctx
- // from argument list.
- (void)ctx;
+ *(xd->mi_8x8[0]) = ctx->mic;
- // Check for reseting segment_id and update cyclic map.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) {
- vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1);
+ // For in frame adaptive Q, check for reseting the segment_id and updating
+ // the cyclic refresh map.
+ if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) {
+ vp9_cyclic_refresh_update_segment(cpi, &xd->mi_8x8[0]->mbmi,
+ mi_row, mi_col, bsize, 1);
vp9_init_plane_quantizers(cpi, x);
}
-
if (is_inter_block(mbmi)) {
- if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
- MV best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
- vp9_update_mv_count(cm, xd, best_mv);
- }
+ vp9_update_mv_count(cm, xd);
if (cm->interp_filter == SWITCHABLE) {
const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter];
}
}
+
+ x->skip = ctx->skip;
}
static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
- TOKENEXTRA **tp, int mi_row, int mi_col,
- int output_enabled, BLOCK_SIZE bsize) {
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize) {
MACROBLOCK *const x = &cpi->mb;
if (bsize < BLOCK_8X8) {
@@ -1507,6 +1452,7 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
if (x->ab_index > 0)
return;
}
+
set_offsets(cpi, tile, mi_row, mi_col, bsize);
update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize);
@@ -1522,6 +1468,8 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
int output_enabled, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
int ctx;
PARTITION_TYPE partition;
@@ -1534,8 +1482,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
const int idx_str = xd->mode_info_stride * mi_row + mi_col;
MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
- ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
subsize = mi_8x8[0]->mbmi.sb_type;
} else {
ctx = 0;
@@ -1594,8 +1541,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
- update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, subsize, bsize);
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
static void rd_use_partition(VP9_COMP *cpi,
@@ -1606,12 +1552,10 @@ static void rd_use_partition(VP9_COMP *cpi,
int do_recon) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
const int mis = cm->mode_info_stride;
const int bsl = b_width_log2(bsize);
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
- const int ms = num_4x4_blocks_wide / 2;
- const int mh = num_4x4_blocks_high / 2;
+ const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
const int bss = (1 << bsl) / 4;
int i, pl;
PARTITION_TYPE partition = PARTITION_NONE;
@@ -1630,10 +1574,14 @@ static void rd_use_partition(VP9_COMP *cpi,
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
+ int do_partition_search = 1;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
+ assert(num_4x4_blocks_wide_lookup[bsize] ==
+ num_4x4_blocks_high_lookup[bsize]);
+
partition = partition_lookup[bsl][bs_type];
subsize = get_subsize(bsize, partition);
@@ -1653,9 +1601,22 @@ static void rd_use_partition(VP9_COMP *cpi,
if (bsize == BLOCK_16X16) {
set_offsets(cpi, tile, mi_row, mi_col, bsize);
x->mb_energy = vp9_block_energy(cpi, x, bsize);
+ } else {
+ x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
}
- if (cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ if (!x->in_active_map) {
+ do_partition_search = 0;
+ if (mi_row + (mi_step >> 1) < cm->mi_rows &&
+ mi_col + (mi_step >> 1) < cm->mi_cols) {
+ *(get_sb_partitioning(x, bsize)) = bsize;
+ bs_type = mi_8x8[0]->mbmi.sb_type = bsize;
+ subsize = bsize;
+ partition = PARTITION_NONE;
+ }
+ }
+ if (do_partition_search &&
+ cpi->sf.partition_search_type == SEARCH_PARTITION &&
cpi->sf.adjust_partitioning_from_last_frame) {
// Check if any of the sub blocks are further split.
if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
@@ -1673,15 +1634,13 @@ static void rd_use_partition(VP9_COMP *cpi,
// If partition is not none try none unless each of the 4 splits are split
// even further..
if (partition != PARTITION_NONE && !splits_below &&
- mi_row + (ms >> 1) < cm->mi_rows &&
- mi_col + (ms >> 1) < cm->mi_cols) {
+ mi_row + (mi_step >> 1) < cm->mi_rows &&
+ mi_col + (mi_step >> 1) < cm->mi_cols) {
*(get_sb_partitioning(x, bsize)) = bsize;
rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
get_block_context(x, bsize), INT64_MAX);
- pl = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
if (none_rate < INT_MAX) {
none_rate += x->partition_cost[pl][PARTITION_NONE];
@@ -1706,14 +1665,14 @@ static void rd_use_partition(VP9_COMP *cpi,
&last_part_dist, subsize,
get_block_context(x, subsize), INT64_MAX);
if (last_part_rate != INT_MAX &&
- bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
+ bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
int rt = 0;
int64_t dt = 0;
update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(x, subsize) = 1;
- rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt,
+ rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
subsize, get_block_context(x, subsize), INT64_MAX);
if (rt == INT_MAX || dt == INT64_MAX) {
last_part_rate = INT_MAX;
@@ -1731,14 +1690,14 @@ static void rd_use_partition(VP9_COMP *cpi,
&last_part_dist, subsize,
get_block_context(x, subsize), INT64_MAX);
if (last_part_rate != INT_MAX &&
- bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
+ bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
int rt = 0;
int64_t dt = 0;
update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(x, subsize) = 1;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt,
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
subsize, get_block_context(x, subsize), INT64_MAX);
if (rt == INT_MAX || dt == INT64_MAX) {
last_part_rate = INT_MAX;
@@ -1754,8 +1713,8 @@ static void rd_use_partition(VP9_COMP *cpi,
last_part_rate = 0;
last_part_dist = 0;
for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) * (ms >> 1);
- int y_idx = (i >> 1) * (ms >> 1);
+ int x_idx = (i & 1) * (mi_step >> 1);
+ int y_idx = (i >> 1) * (mi_step >> 1);
int jj = i >> 1, ii = i & 0x01;
int rt;
int64_t dt;
@@ -1781,18 +1740,20 @@ static void rd_use_partition(VP9_COMP *cpi,
assert(0);
}
- pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
if (last_part_rate < INT_MAX) {
last_part_rate += x->partition_cost[pl][partition];
last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
}
- if (cpi->sf.adjust_partitioning_from_last_frame
+ if (do_partition_search
+ && cpi->sf.adjust_partitioning_from_last_frame
&& cpi->sf.partition_search_type == SEARCH_PARTITION
&& partition != PARTITION_SPLIT && bsize > BLOCK_8X8
- && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
- && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
+ && (mi_row + mi_step < cm->mi_rows ||
+ mi_row + (mi_step >> 1) == cm->mi_rows)
+ && (mi_col + mi_step < cm->mi_cols ||
+ mi_col + (mi_step >> 1) == cm->mi_cols)) {
BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
chosen_rate = 0;
chosen_dist = 0;
@@ -1800,8 +1761,8 @@ static void rd_use_partition(VP9_COMP *cpi,
// Split partition.
for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2);
- int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2);
+ int x_idx = (i & 1) * (mi_step >> 1);
+ int y_idx = (i >> 1) * (mi_step >> 1);
int rt = 0;
int64_t dt = 0;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
@@ -1835,14 +1796,11 @@ static void rd_use_partition(VP9_COMP *cpi,
encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0,
split_subsize);
- pl = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row + y_idx, mi_col + x_idx,
+ pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
split_subsize);
chosen_rate += x->partition_cost[pl][PARTITION_NONE];
}
- pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
if (chosen_rate < INT_MAX) {
chosen_rate += x->partition_cost[pl][PARTITION_SPLIT];
chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
@@ -1880,14 +1838,14 @@ static void rd_use_partition(VP9_COMP *cpi,
// and and if necessary apply a Q delta using segmentation to get
// closer to the target.
if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
- select_in_frame_q_segment(cpi, mi_row, mi_col,
- output_enabled, chosen_rate);
- }
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- cpi->cyclic_refresh.projected_rate_sb = chosen_rate;
- cpi->cyclic_refresh.projected_dist_sb = chosen_dist;
+ vp9_select_in_frame_q_segment(cpi, mi_row, mi_col,
+ output_enabled, chosen_rate);
}
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+ chosen_rate, chosen_dist);
+
encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
}
@@ -1951,77 +1909,71 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
// Look at neighboring blocks and set a min and max partition size based on
// what they chose.
static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
- int row, int col,
+ int mi_row, int mi_col,
BLOCK_SIZE *min_block_size,
BLOCK_SIZE *max_block_size) {
- VP9_COMMON * const cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- MODE_INFO ** mi_8x8 = xd->mi_8x8;
- MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8;
-
+ MODE_INFO **mi_8x8 = xd->mi_8x8;
const int left_in_image = xd->left_available && mi_8x8[-1];
const int above_in_image = xd->up_available &&
mi_8x8[-xd->mode_info_stride];
- MODE_INFO ** above_sb64_mi_8x8;
- MODE_INFO ** left_sb64_mi_8x8;
+ MODE_INFO **above_sb64_mi_8x8;
+ MODE_INFO **left_sb64_mi_8x8;
- int row8x8_remaining = tile->mi_row_end - row;
- int col8x8_remaining = tile->mi_col_end - col;
+ int row8x8_remaining = tile->mi_row_end - mi_row;
+ int col8x8_remaining = tile->mi_col_end - mi_col;
int bh, bw;
-
+ BLOCK_SIZE min_size = BLOCK_4X4;
+ BLOCK_SIZE max_size = BLOCK_64X64;
// Trap case where we do not have a prediction.
- if (!left_in_image && !above_in_image &&
- ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) {
- *min_block_size = BLOCK_4X4;
- *max_block_size = BLOCK_64X64;
- } else {
+ if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
// Default "min to max" and "max to min"
- *min_block_size = BLOCK_64X64;
- *max_block_size = BLOCK_4X4;
+ min_size = BLOCK_64X64;
+ max_size = BLOCK_4X4;
// NOTE: each call to get_sb_partition_size_range() uses the previous
// passed in values for min and max as a starting point.
- //
// Find the min and max partition used in previous frame at this location
- if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) {
- get_sb_partition_size_range(cpi, prev_mi_8x8,
- min_block_size, max_block_size);
+ if (cm->frame_type != KEY_FRAME) {
+ MODE_INFO **const prev_mi =
+ &cm->prev_mi_grid_visible[mi_row * xd->mode_info_stride + mi_col];
+ get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size);
}
-
// Find the min and max partition sizes used in the left SB64
if (left_in_image) {
left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE];
get_sb_partition_size_range(cpi, left_sb64_mi_8x8,
- min_block_size, max_block_size);
+ &min_size, &max_size);
}
-
// Find the min and max partition sizes used in the above SB64.
if (above_in_image) {
above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE];
get_sb_partition_size_range(cpi, above_sb64_mi_8x8,
- min_block_size, max_block_size);
+ &min_size, &max_size);
+ }
+ // adjust observed min and max
+ if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
+ min_size = min_partition_size[min_size];
+ max_size = max_partition_size[max_size];
}
}
- // adjust observed min and max
- if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
- *min_block_size = min_partition_size[*min_block_size];
- *max_block_size = max_partition_size[*max_block_size];
- }
-
- // Check border cases where max and min from neighbours may not be legal.
- *max_block_size = find_partition_size(*max_block_size,
- row8x8_remaining, col8x8_remaining,
- &bh, &bw);
- *min_block_size = MIN(*min_block_size, *max_block_size);
+ // Check border cases where max and min from neighbors may not be legal.
+ max_size = find_partition_size(max_size,
+ row8x8_remaining, col8x8_remaining,
+ &bh, &bw);
+ min_size = MIN(min_size, max_size);
// When use_square_partition_only is true, make sure at least one square
// partition is allowed by selecting the next smaller square size as
// *min_block_size.
if (cpi->sf.use_square_partition_only &&
- next_square_size[*max_block_size] < *min_block_size) {
- *min_block_size = next_square_size[*max_block_size];
+ next_square_size[max_size] < min_size) {
+ min_size = next_square_size[max_size];
}
+ *min_block_size = min_size;
+ *max_block_size = max_size;
}
static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
@@ -2041,7 +1993,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
int64_t *dist, int do_recon, int64_t best_rd) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
- const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
TOKENEXTRA *tp_orig = *tp;
@@ -2054,8 +2007,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
int do_split = bsize >= BLOCK_8X8;
int do_rect = 1;
// Override skipping rectangular partition operations for edge blocks
- const int force_horz_split = (mi_row + ms >= cm->mi_rows);
- const int force_vert_split = (mi_col + ms >= cm->mi_cols);
+ const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
+ const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
const int xss = x->e_mbd.plane[1].subsampling_x;
const int yss = x->e_mbd.plane[1].subsampling_y;
@@ -2081,6 +2034,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (bsize == BLOCK_16X16) {
set_offsets(cpi, tile, mi_row, mi_col, bsize);
x->mb_energy = vp9_block_energy(cpi, x, bsize);
+ } else {
+ x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
}
// Determine partition types in search according to the speed features.
@@ -2122,9 +2077,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
ctx, best_rd);
if (this_rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
- pl = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
this_rate += x->partition_cost[pl][PARTITION_NONE];
}
sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
@@ -2169,8 +2122,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (do_split) {
subsize = get_subsize(bsize, PARTITION_SPLIT);
for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
- const int x_idx = (i & 1) * ms;
- const int y_idx = (i >> 1) * ms;
+ const int x_idx = (i & 1) * mi_step;
+ const int y_idx = (i >> 1) * mi_step;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
@@ -2194,9 +2147,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
}
if (sum_rd < best_rd && i == 4) {
- pl = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd) {
@@ -2228,7 +2179,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
get_block_context(x, subsize), best_rd);
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+ if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
@@ -2240,7 +2191,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
get_block_context(x, subsize)->pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
+ rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
&this_dist, subsize, get_block_context(x, subsize),
best_rd - sum_rd);
if (this_rate == INT_MAX) {
@@ -2252,9 +2203,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
}
if (sum_rd < best_rd) {
- pl = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rate += x->partition_cost[pl][PARTITION_HORZ];
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd) {
@@ -2281,7 +2230,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
get_block_context(x, subsize), best_rd);
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+ if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
@@ -2293,7 +2242,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
get_block_context(x, subsize)->pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
&this_dist, subsize, get_block_context(x, subsize),
best_rd - sum_rd);
if (this_rate == INT_MAX) {
@@ -2305,9 +2254,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
}
if (sum_rd < best_rd) {
- pl = partition_plane_context(cpi->above_seg_context,
- cpi->left_seg_context,
- mi_row, mi_col, bsize);
+ pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rate += x->partition_cost[pl][PARTITION_VERT];
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd) {
@@ -2335,13 +2282,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// and and if necessary apply a Q delta using segmentation to get
// closer to the target.
if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
- select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate);
- }
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- cpi->cyclic_refresh.projected_rate_sb = best_rate;
- cpi->cyclic_refresh.projected_dist_sb = best_dist;
+ vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
+ best_rate);
}
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+ best_rate, best_dist);
+
encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
}
if (bsize == BLOCK_64X64) {
@@ -2356,11 +2304,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
int mi_col;
// Initialize the left context for the new SB row
- vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
- vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+ vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
+ vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
@@ -2486,11 +2435,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
- vpx_memset(cpi->above_context[0], 0,
- sizeof(*cpi->above_context[0]) *
+ vpx_memset(xd->above_context[0], 0,
+ sizeof(*xd->above_context[0]) *
2 * aligned_mi_cols * MAX_MB_PLANE);
- vpx_memset(cpi->above_seg_context, 0,
- sizeof(*cpi->above_seg_context) * aligned_mi_cols);
+ vpx_memset(xd->above_seg_context, 0,
+ sizeof(*xd->above_seg_context) * aligned_mi_cols);
}
static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
@@ -2533,87 +2482,15 @@ static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
return 1;
}
-static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs,
- TX_SIZE tx_size) {
- int x, y;
-
- for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++)
- mi_8x8[y * mis + x]->mbmi.tx_size = tx_size;
- }
-}
-
-static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis,
- TX_SIZE max_tx_size, int bw, int bh,
- int mi_row, int mi_col,
- MODE_INFO **mi_8x8) {
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
- return;
- } else {
- const MB_MODE_INFO *const mbmi = &mi_8x8[0]->mbmi;
- if (mbmi->tx_size > max_tx_size) {
- const int ymbs = MIN(bh, cm->mi_rows - mi_row);
- const int xmbs = MIN(bw, cm->mi_cols - mi_col);
-
- assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
- get_skip_flag(mi_8x8, mis, ymbs, xmbs));
- set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
- }
- }
-}
-
-static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8,
- TX_SIZE max_tx_size, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- const int mis = cm->mode_info_stride;
- int bw, bh;
- const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
- return;
-
- bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
- bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
-
- if (bw == bs && bh == bs) {
- reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col,
- mi_8x8);
- } else if (bw == bs && bh < bs) {
- reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col,
- mi_8x8);
- reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs,
- mi_col, mi_8x8 + hbs * mis);
- } else if (bw < bs && bh == bs) {
- reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col,
- mi_8x8);
- reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row,
- mi_col + hbs, mi_8x8 + hbs);
- } else {
- const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
- int n;
-
- assert(bw < bs && bh < bs);
-
- for (n = 0; n < 4; n++) {
- const int mi_dc = hbs * (n & 1);
- const int mi_dr = hbs * (n >> 1);
-
- reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size,
- mi_row + mi_dr, mi_col + mi_dc, subsize);
- }
- }
-}
-
static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
int mi_row, mi_col;
const int mis = cm->mode_info_stride;
- MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible;
+ MODE_INFO **mi_ptr = cm->mi_grid_visible;
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
- mi_8x8 = mi_ptr;
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) {
- reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col,
- BLOCK_64X64);
+ for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
+ for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+ if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max)
+ mi_ptr[mi_col]->mbmi.tx_size = txfm_max;
}
}
}
@@ -2693,6 +2570,7 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
MACROBLOCKD *const xd = &x->e_mbd;
set_offsets(cpi, tile, mi_row, mi_col, bsize);
xd->mi_8x8[0]->mbmi.sb_type = bsize;
+
if (!frame_is_intra_only(cm)) {
vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
rate, dist, bsize);
@@ -2700,7 +2578,338 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
MB_PREDICTION_MODE intramode = DC_PRED;
set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode);
}
- duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize);
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+}
+
+static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, BLOCK_SIZE subsize) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition = partition_lookup[bsl][subsize];
+
+ assert(bsize >= BLOCK_8X8);
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ switch (partition) {
+ case PARTITION_NONE:
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+ break;
+ case PARTITION_VERT:
+ *get_sb_index(x, subsize) = 0;
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+
+ if (mi_col + hbs < cm->mi_cols) {
+ *get_sb_index(x, subsize) = 1;
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize);
+ }
+ break;
+ case PARTITION_HORZ:
+ *get_sb_index(x, subsize) = 0;
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+ if (mi_row + hbs < cm->mi_rows) {
+ *get_sb_index(x, subsize) = 1;
+ set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize);
+ }
+ break;
+ case PARTITION_SPLIT:
+ *get_sb_index(x, subsize) = 0;
+ fill_mode_info_sb(cm, x, mi_row, mi_col, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ *get_sb_index(x, subsize) = 1;
+ fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ *get_sb_index(x, subsize) = 2;
+ fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ *get_sb_index(x, subsize) = 3;
+ fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ break;
+ default:
+ break;
+ }
+}
+
+static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *rate,
+ int64_t *dist, int do_recon, int64_t best_rd) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
+ TOKENEXTRA *tp_orig = *tp;
+ PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+ int i;
+ BLOCK_SIZE subsize;
+ int this_rate, sum_rate = 0, best_rate = INT_MAX;
+ int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
+ int64_t sum_rd = 0;
+ int do_split = bsize >= BLOCK_8X8;
+ int do_rect = 1;
+ // Override skipping rectangular partition operations for edge blocks
+ const int force_horz_split = (mi_row + ms >= cm->mi_rows);
+ const int force_vert_split = (mi_col + ms >= cm->mi_cols);
+ const int xss = x->e_mbd.plane[1].subsampling_x;
+ const int yss = x->e_mbd.plane[1].subsampling_y;
+
+ int partition_none_allowed = !force_horz_split && !force_vert_split;
+ int partition_horz_allowed = !force_vert_split && yss <= xss &&
+ bsize >= BLOCK_8X8;
+ int partition_vert_allowed = !force_horz_split && xss <= yss &&
+ bsize >= BLOCK_8X8;
+ (void) *tp_orig;
+
+ if (bsize < BLOCK_8X8) {
+ // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+ // there is nothing to be done.
+ if (x->ab_index != 0) {
+ *rate = 0;
+ *dist = 0;
+ return;
+ }
+ }
+
+ assert(num_8x8_blocks_wide_lookup[bsize] ==
+ num_8x8_blocks_high_lookup[bsize]);
+
+ x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
+
+ // Determine partition types in search according to the speed features.
+ // The threshold set here has to be of square block size.
+ if (cpi->sf.auto_min_max_partition_size) {
+ partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
+ bsize >= cpi->sf.min_partition_size);
+ partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_horz_split);
+ partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_vert_split);
+ do_split &= bsize > cpi->sf.min_partition_size;
+ }
+ if (cpi->sf.use_square_partition_only) {
+ partition_horz_allowed &= force_horz_split;
+ partition_vert_allowed &= force_vert_split;
+ }
+
+ if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
+ do_split = 0;
+
+ // PARTITION_NONE
+ if (partition_none_allowed) {
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+ &this_rate, &this_dist, bsize);
+ ctx->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ if (this_rate != INT_MAX) {
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_NONE];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+ if (sum_rd < best_rd) {
+ int64_t stop_thresh = 4096;
+ int64_t stop_thresh_rd;
+
+ best_rate = this_rate;
+ best_dist = this_dist;
+ best_rd = sum_rd;
+ if (bsize >= BLOCK_8X8)
+ *(get_sb_partitioning(x, bsize)) = bsize;
+
+ // Adjust threshold according to partition size.
+ stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+
+ stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
+ // If obtained distortion is very small, choose current partition
+ // and stop splitting.
+ if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+ }
+ if (!x->in_active_map) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+
+ // store estimated motion vector
+ store_pred_mv(x, ctx);
+
+ // PARTITION_SPLIT
+ sum_rd = 0;
+ if (do_split) {
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+ const int x_idx = (i & 1) * ms;
+ const int y_idx = (i >> 1) * ms;
+
+ if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+ continue;
+
+ *get_sb_index(x, subsize) = i;
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
+ subsize, &this_rate, &this_dist, 0,
+ best_rd - sum_rd);
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ }
+ }
+
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ } else {
+ // skip rectangular partition test when larger block size
+ // gives better rd cost
+ if (cpi->sf.less_rectangular_check)
+ do_rect &= !partition_none_allowed;
+ }
+ }
+
+ // PARTITION_HORZ
+ if (partition_horz_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_HORZ);
+ *get_sb_index(x, subsize) = 0;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+ &this_rate, &this_dist, subsize);
+
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+
+ if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+ *get_sb_index(x, subsize) = 1;
+
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
+ &this_rate, &this_dist, subsize);
+
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_HORZ];
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ }
+ }
+ if (sum_rd < best_rd) {
+ best_rd = sum_rd;
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ }
+ }
+
+ // PARTITION_VERT
+ if (partition_vert_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_VERT);
+
+ *get_sb_index(x, subsize) = 0;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+ &this_rate, &this_dist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+ *get_sb_index(x, subsize) = 1;
+
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
+ &this_rate, &this_dist, subsize);
+
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_VERT];
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ }
+ }
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ }
+ }
+
+ *rate = best_rate;
+ *dist = best_dist;
+
+ if (best_rate == INT_MAX)
+ return;
+
+ // update mode info array
+ fill_mode_info_sb(cm, x, mi_row, mi_col, bsize,
+ *(get_sb_partitioning(x, bsize)));
+
+ if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
+ int output_enabled = (bsize == BLOCK_64X64);
+
+ // Check the projected output rate for this SB against it's target
+ // and and if necessary apply a Q delta using segmentation to get
+ // closer to the target.
+ if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
+ vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
+ best_rate);
+ }
+
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+ best_rate, best_dist);
+
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+ }
+
+ if (bsize == BLOCK_64X64) {
+ assert(tp_orig < *tp);
+ assert(best_rate < INT_MAX);
+ assert(best_dist < INT64_MAX);
+ } else {
+ assert(tp_orig == *tp);
+ }
}
static void nonrd_use_partition(VP9_COMP *cpi,
@@ -2712,12 +2921,13 @@ static void nonrd_use_partition(VP9_COMP *cpi,
int *totrate, int64_t *totdist) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
const int mis = cm->mode_info_stride;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
- int rate;
- int64_t dist;
+ int rate = INT_MAX;
+ int64_t dist = INT64_MAX;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -2733,14 +2943,17 @@ static void nonrd_use_partition(VP9_COMP *cpi,
switch (partition) {
case PARTITION_NONE:
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
break;
case PARTITION_VERT:
*get_sb_index(x, subsize) = 0;
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
if (mi_col + hbs < cm->mi_cols) {
*get_sb_index(x, subsize) = 1;
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
&rate, &dist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -2751,10 +2964,12 @@ static void nonrd_use_partition(VP9_COMP *cpi,
case PARTITION_HORZ:
*get_sb_index(x, subsize) = 0;
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
if (mi_row + hbs < cm->mi_rows) {
*get_sb_index(x, subsize) = 1;
nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
&rate, &dist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -2764,7 +2979,6 @@ static void nonrd_use_partition(VP9_COMP *cpi,
break;
case PARTITION_SPLIT:
subsize = get_subsize(bsize, PARTITION_SPLIT);
-
*get_sb_index(x, subsize) = 0;
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
subsize, output_enabled, totrate, totdist);
@@ -2801,10 +3015,9 @@ static void nonrd_use_partition(VP9_COMP *cpi,
}
if (bsize == BLOCK_64X64 && output_enabled) {
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- cpi->cyclic_refresh.projected_rate_sb = *totrate;
- cpi->cyclic_refresh.projected_dist_sb = *totdist;
- }
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+ *totrate, *totdist);
encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
}
}
@@ -2812,33 +3025,55 @@ static void nonrd_use_partition(VP9_COMP *cpi,
static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
int mi_col;
// Initialize the left context for the new SB row
- vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
- vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+ vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
+ vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- int dummy_rate;
- int64_t dummy_dist;
+ int dummy_rate = 0;
+ int64_t dummy_dist = 0;
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
+ MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+
BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
cpi->sf.always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
cpi->mb.source_variance = UINT_MAX;
+ vp9_zero(cpi->mb.pred_mv);
// Set the partition type of the 64X64 block
- if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
- choose_partitioning(cpi, tile, mi_row, mi_col);
- else
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-
- nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1,
- &dummy_rate, &dummy_dist);
+ switch (cpi->sf.partition_search_type) {
+ case VAR_BASED_PARTITION:
+ choose_partitioning(cpi, tile, mi_row, mi_col);
+ nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+ 1, &dummy_rate, &dummy_dist);
+ break;
+ case VAR_BASED_FIXED_PARTITION:
+ case FIXED_PARTITION:
+ set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+ nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+ 1, &dummy_rate, &dummy_dist);
+ break;
+ case REFERENCE_PARTITION:
+ if (cpi->sf.partition_check || sb_has_motion(cm, prev_mi_8x8)) {
+ nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1, INT64_MAX);
+ } else {
+ copy_partitioning(cm, mi_8x8, prev_mi_8x8);
+ nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rate, &dummy_dist);
+ }
+ break;
+ default:
+ assert(0);
+ }
}
}
// end RTC play code
@@ -3192,9 +3427,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
- (cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
- cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) &&
- !cpi->sf.use_nonrd_pick_mode;
+ cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
+ cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
+ cpi->sf.allow_skip_recode;
+
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 703dde323..2a10bbfde 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -229,22 +229,21 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
-static void inc_mvs(const int_mv mv[2], const MV ref[2], int is_compound,
+static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2],
nmv_context_counts *counts) {
int i;
- for (i = 0; i < 1 + is_compound; ++i) {
- const MV diff = { mv[i].as_mv.row - ref[i].row,
- mv[i].as_mv.col - ref[i].col };
+
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ const MV *ref = &mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
+ const MV diff = {mvs[i].as_mv.row - ref->row,
+ mvs[i].as_mv.col - ref->col};
vp9_inc_mv(&diff, counts);
}
}
-void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd,
- const MV best_ref_mv[2]) {
+void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) {
const MODE_INFO *mi = xd->mi_8x8[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
- const int is_compound = has_second_ref(mbmi);
- nmv_context_counts *counts = &cm->counts.mv;
if (mbmi->sb_type < BLOCK_8X8) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
@@ -255,11 +254,12 @@ void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd,
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int i = idy * 2 + idx;
if (mi->bmi[i].as_mode == NEWMV)
- inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, counts);
+ inc_mvs(mbmi, mi->bmi[i].as_mv, &cm->counts.mv);
}
}
- } else if (mbmi->mode == NEWMV) {
- inc_mvs(mbmi->mv, best_ref_mv, is_compound, counts);
+ } else {
+ if (mbmi->mode == NEWMV)
+ inc_mvs(mbmi, mbmi->mv, &cm->counts.mv);
}
}
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index f16b2c17c..50cb9611b 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -28,8 +28,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context* mvctx, int usehp);
-void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd,
- const MV best_ref_mv[2]);
+void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 8167d43a8..6d36b7cf5 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -23,6 +23,7 @@
#include "vp9/common/vp9_reconinter.h" // vp9_setup_dst_planes()
#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_block.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
@@ -34,7 +35,6 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
-#include "vp9/encoder/vp9_vaq.h"
#include "vp9/encoder/vp9_variance.h"
#define OUTPUT_FPF 0
@@ -181,11 +181,13 @@ static void zero_stats(FIRSTPASS_STATS *section) {
section->new_mv_count = 0.0;
section->count = 0.0;
section->duration = 1.0;
+ section->spatial_layer_id = 0;
}
static void accumulate_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
section->frame += frame->frame;
+ section->spatial_layer_id = frame->spatial_layer_id;
section->intra_error += frame->intra_error;
section->coded_error += frame->coded_error;
section->sr_coded_error += frame->sr_coded_error;
@@ -255,12 +257,21 @@ static void avg_stats(FIRSTPASS_STATS *section) {
// harder frames.
static double calculate_modified_err(const VP9_COMP *cpi,
const FIRSTPASS_STATS *this_frame) {
- const struct twopass_rc *const twopass = &cpi->twopass;
- const FIRSTPASS_STATS *const stats = &twopass->total_stats;
- const double av_err = stats->ssim_weighted_pred_err / stats->count;
- double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
- DOUBLE_DIVIDE_CHECK(av_err),
- cpi->oxcf.two_pass_vbrbias / 100.0);
+ const struct twopass_rc *twopass = &cpi->twopass;
+ const FIRSTPASS_STATS *stats;
+ double av_err;
+ double modified_error;
+
+ if (cpi->svc.number_spatial_layers > 1 &&
+ cpi->svc.number_temporal_layers == 1) {
+ twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+ }
+
+ stats = &twopass->total_stats;
+ av_err = stats->ssim_weighted_pred_err / stats->count;
+ modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
+ DOUBLE_DIVIDE_CHECK(av_err),
+ cpi->oxcf.two_pass_vbrbias / 100.0);
return fclamp(modified_error,
twopass->modified_error_min, twopass->modified_error_max);
@@ -342,7 +353,15 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
}
void vp9_end_first_pass(VP9_COMP *cpi) {
- output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ int i;
+ for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+ output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
+ cpi->output_pkt_list);
+ }
+ } else {
+ output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+ }
}
static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
@@ -464,11 +483,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
int recon_yoffset, recon_uvoffset;
YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- YV12_BUFFER_CONFIG *const gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
- const int recon_y_stride = lst_yv12->y_stride;
- const int recon_uv_stride = lst_yv12->uv_stride;
- const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
+ int recon_y_stride = lst_yv12->y_stride;
+ int recon_uv_stride = lst_yv12->uv_stride;
+ int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
int64_t intra_error = 0;
int64_t coded_error = 0;
int64_t sr_coded_error = 0;
@@ -484,13 +503,43 @@ void vp9_first_pass(VP9_COMP *cpi) {
int new_mv_count = 0;
int sum_in_vectors = 0;
uint32_t lastmv_as_int = 0;
- struct twopass_rc *const twopass = &cpi->twopass;
+ struct twopass_rc *twopass = &cpi->twopass;
const MV zero_mv = {0, 0};
+ const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
vp9_clear_system_state();
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
+ const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
+ twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+
+ vp9_scale_references(cpi);
+
+ // Use either last frame or alt frame for motion search.
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
+ ref_frame = LAST_FRAME;
+ } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
+ scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME);
+ ref_frame = ALTREF_FRAME;
+ }
+
+ if (scaled_ref_buf != NULL) {
+ // Update the stride since we are using scaled reference buffer
+ first_ref_buf = scaled_ref_buf;
+ recon_y_stride = first_ref_buf->y_stride;
+ recon_uv_stride = first_ref_buf->uv_stride;
+ uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height);
+ }
+
+ // Disable golden frame for svc first pass for now.
+ gld_yv12 = NULL;
+ set_ref_ptrs(cm, xd, ref_frame, NONE);
+ }
+
vp9_setup_src_planes(x, cpi->Source, 0, 0);
- vp9_setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL);
+ vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
vp9_setup_dst_planes(xd, new_yv12, 0, 0);
xd->mi_8x8 = cm->mi_grid_visible;
@@ -583,7 +632,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
int tmp_err, motion_error;
int_mv mv, tmp_mv;
- xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
+ xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
motion_error = zz_motion_search(x);
// Assume 0,0 motion with no mv overhead.
mv.as_int = tmp_mv.as_int = 0;
@@ -615,7 +664,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
}
// Search in an older reference frame.
- if (cm->current_video_frame > 1) {
+ if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
// Assume 0,0 motion with no mv overhead.
int gf_motion_error;
@@ -633,9 +682,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
++second_ref_count;
// Reset to last frame as reference buffer.
- xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
- xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset;
- xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset;
+ xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+ xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+ xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
// In accumulating a score for the older reference frame take the
// best of the motion predicted score and the intra coded error
@@ -743,6 +792,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
FIRSTPASS_STATS fps;
fps.frame = cm->current_video_frame;
+ fps.spatial_layer_id = cpi->svc.spatial_layer_id;
fps.intra_error = (double)(intra_error >> 8);
fps.coded_error = (double)(coded_error >> 8);
fps.sr_coded_error = (double)(sr_coded_error >> 8);
@@ -792,20 +842,28 @@ void vp9_first_pass(VP9_COMP *cpi) {
(twopass->this_frame_stats.pcnt_inter > 0.20) &&
((twopass->this_frame_stats.intra_error /
DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
- vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ if (gld_yv12 != NULL) {
+ vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ }
twopass->sr_update_lag = 1;
} else {
++twopass->sr_update_lag;
}
- // Swap frame pointers so last frame refers to the frame we just compressed.
- swap_yv12(lst_yv12, new_yv12);
+
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ vp9_update_reference_frames(cpi);
+ } else {
+ // Swap frame pointers so last frame refers to the frame we just compressed.
+ swap_yv12(lst_yv12, new_yv12);
+ }
vp9_extend_frame_borders(lst_yv12);
// Special case for the first frame. Copy into the GF buffer as a second
// reference.
- if (cm->current_video_frame == 0)
+ if (cm->current_video_frame == 0 && gld_yv12 != NULL) {
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ }
// Use this to see what the first pass reconstruction looks like.
if (0) {
@@ -884,8 +942,15 @@ extern void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_init_second_pass(VP9_COMP *cpi) {
FIRSTPASS_STATS this_frame;
const FIRSTPASS_STATS *start_pos;
- struct twopass_rc *const twopass = &cpi->twopass;
+ struct twopass_rc *twopass = &cpi->twopass;
const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ const int is_spatial_svc = (cpi->svc.number_spatial_layers > 1) &&
+ (cpi->svc.number_temporal_layers == 1);
+ double frame_rate;
+
+ if (is_spatial_svc) {
+ twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+ }
zero_stats(&twopass->total_stats);
zero_stats(&twopass->total_left_stats);
@@ -896,30 +961,44 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
twopass->total_stats = *twopass->stats_in_end;
twopass->total_left_stats = twopass->total_stats;
+ frame_rate = 10000000.0 * twopass->total_stats.count /
+ twopass->total_stats.duration;
// Each frame can have a different duration, as the frame rate in the source
// isn't guaranteed to be constant. The frame rate prior to the first frame
// encoded in the second pass is a guess. However, the sum duration is not.
// It is calculated based on the actual durations of all frames from the
// first pass.
- vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count /
- twopass->total_stats.duration);
+
+ if (is_spatial_svc) {
+ vp9_update_spatial_layer_framerate(cpi, frame_rate);
+ twopass->bits_left =
+ (int64_t)(twopass->total_stats.duration *
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].target_bandwidth /
+ 10000000.0);
+ } else {
+ vp9_new_framerate(cpi, frame_rate);
+ twopass->bits_left = (int64_t)(twopass->total_stats.duration *
+ oxcf->target_bandwidth / 10000000.0);
+ }
cpi->output_framerate = oxcf->framerate;
- twopass->bits_left = (int64_t)(twopass->total_stats.duration *
- oxcf->target_bandwidth / 10000000.0);
// Calculate a minimum intra value to be used in determining the IIratio
// scores used in the second pass. We have this minimum to make sure
// that clips that are static but "low complexity" in the intra domain
// are still boosted appropriately for KF/GF/ARF.
- twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
- twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+ if (!is_spatial_svc) {
+ // We don't know the number of MBs for each layer at this point.
+ // So we will do it later.
+ twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
+ twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+ }
// This variable monitors how far behind the second ref update is lagging.
twopass->sr_update_lag = 1;
- // Scan the first pass file and calculate an average Intra / Inter error score
- // ratio for the sequence.
+ // Scan the first pass file and calculate an average Intra / Inter error
+ // score ratio for the sequence.
{
double sum_iiratio = 0.0;
start_pos = twopass->stats_in;
@@ -1566,8 +1645,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Calculate the bits to be allocated to the group as a whole.
if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) {
- twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits *
- (gf_group_err / cpi->twopass.kf_group_error_left));
+ twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits *
+ (gf_group_err / twopass->kf_group_error_left));
} else {
twopass->gf_group_bits = 0;
}
@@ -2138,14 +2217,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
struct twopass_rc *const twopass = &cpi->twopass;
- const int frames_left = (int)(twopass->total_stats.count -
- cm->current_video_frame);
+ int frames_left;
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
double this_frame_intra_error;
double this_frame_coded_error;
int target;
+ LAYER_CONTEXT *lc = NULL;
+ int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1);
+
+ if (is_spatial_svc) {
+ lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ frames_left = (int)(twopass->total_stats.count -
+ lc->current_video_frame_in_layer);
+ } else {
+ frames_left = (int)(twopass->total_stats.count -
+ cm->current_video_frame);
+ }
if (!twopass->stats_in)
return;
@@ -2158,9 +2247,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
vp9_clear_system_state();
+ if (is_spatial_svc && twopass->kf_intra_err_min == 0) {
+ twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
+ twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+ }
+
if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
twopass->active_worst_quality = cpi->oxcf.cq_level;
- } else if (cm->current_video_frame == 0) {
+ } else if (cm->current_video_frame == 0 ||
+ (is_spatial_svc && lc->current_video_frame_in_layer == 0)) {
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
@@ -2183,6 +2278,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// Define next KF group and assign bits to it.
this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
+ // Don't place key frame in any enhancement layers in spatial svc
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.spatial_layer_id > 0) {
+ cm->frame_type = INTER_FRAME;
+ }
} else {
cm->frame_type = INTER_FRAME;
}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 278b22c5c..9268c38a8 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -35,6 +35,7 @@ typedef struct {
double new_mv_count;
double duration;
double count;
+ int64_t spatial_layer_id;
} FIRSTPASS_STATS;
struct twopass_rc {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 2ae8a2aa7..e0299f07f 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -23,6 +23,11 @@
// #define NEW_DIAMOND_SEARCH
+static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
+ const MV *mv) {
+ return &buf->buf[mv->row * buf->stride + mv->col];
+}
+
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
@@ -370,9 +375,9 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
unsigned int sse;
unsigned int whichdir;
int thismse;
- unsigned int halfiters = iters_per_step;
- unsigned int quarteriters = iters_per_step;
- unsigned int eighthiters = iters_per_step;
+ const unsigned int halfiters = iters_per_step;
+ const unsigned int quarteriters = iters_per_step;
+ const unsigned int eighthiters = iters_per_step;
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
const int y_stride = xd->plane[0].pre[0].stride;
@@ -399,7 +404,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
// pixel search, and can be passed in this function.
- comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -736,7 +741,6 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost) {
unsigned int bestsad;
- MV this_mv;
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *what = x->plane[0].src.buf;
const int what_stride = x->plane[0].src.stride;
@@ -744,8 +748,7 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x,
const uint8_t *base_offset = xd->plane[0].pre[0].buf;
const uint8_t *this_offset = base_offset + (best_mv->row * in_what_stride) +
best_mv->col;
- this_mv.row = best_mv->row * 8;
- this_mv.col = best_mv->col * 8;
+ const MV this_mv = {best_mv->row * 8, best_mv->col * 8};
return vfp->svaf(this_offset, in_what_stride, 0, 0, what, what_stride,
&bestsad, second_pred) +
(use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost,
@@ -908,7 +911,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
const int what_stride = x->plane[0].src.stride;
const uint8_t *in_what;
const int in_what_stride = xd->plane[0].pre[0].stride;
- MV this_mv;
unsigned int bestsad = INT_MAX;
int ref_row, ref_col;
@@ -960,8 +962,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
for (i = 0; i < 4; ++i) {
if (sad_array[i] < bestsad) {
- this_mv.row = ref_row + tr;
- this_mv.col = ref_col + tc + i;
+ const MV this_mv = {ref_row + tr, ref_col + tc + i};
thissad = sad_array[i] +
mvsad_err_cost(&this_mv, &fcenter_mv,
mvjsadcost, mvsadcost, sad_per_bit);
@@ -979,8 +980,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
bestsad);
if (thissad < bestsad) {
- this_mv.row = ref_row + tr;
- this_mv.col = ref_col + tc + i;
+ const MV this_mv = {ref_row + tr, ref_col + tc + i};
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
mvjsadcost, mvsadcost, sad_per_bit);
@@ -1331,10 +1331,8 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
const MV *center_mv, MV *best_mv) {
int r, c;
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
@@ -1342,25 +1340,22 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
- ref_mv->col];
- int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride,
- 0x7fffffff) +
+ int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
*best_mv = *ref_mv;
for (r = row_min; r < row_max; ++r) {
for (c = col_min; c < col_max; ++c) {
- const MV this_mv = {r, c};
- const uint8_t *check_here = &in_what[r * in_what_stride + c];
- const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- best_sad) +
- mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ const MV mv = {r, c};
+ const int sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
+ mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
- *best_mv = this_mv;
+ *best_mv = mv;
}
}
}
@@ -1472,7 +1467,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
MV this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
- unsigned int thissad;
int ref_row = ref_mv->row;
int ref_col = ref_mv->col;
@@ -1512,7 +1506,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
for (i = 0; i < 8; i++) {
- thissad = (unsigned int)sad_array8[i];
+ unsigned int thissad = (unsigned int)sad_array8[i];
if (thissad < bestsad) {
this_mv.col = c;
@@ -1537,12 +1531,12 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
for (i = 0; i < 3; i++) {
- thissad = sad_array[i];
+ unsigned int thissad = sad_array[i];
if (thissad < bestsad) {
this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1557,8 +1551,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
}
while (c < col_max) {
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- bestsad);
+ unsigned int thissad = fn_ptr->sdf(what, what_stride,
+ check_here, in_what_stride, bestsad);
if (thissad < bestsad) {
this_mv.col = c;
@@ -1585,41 +1579,34 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv) {
- const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
- int i, j;
-
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const what = x->plane[0].src.buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
- ref_mv->col];
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
- unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address,
- in_what_stride, 0x7fffffff) +
+ unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv),
+ in_what->stride, 0x7fffffff) +
mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ int i, j;
for (i = 0; i < search_range; i++) {
int best_site = -1;
for (j = 0; j < 4; j++) {
- const MV this_mv = {ref_mv->row + neighbors[j].row,
- ref_mv->col + neighbors[j].col};
- if (is_mv_in(x, &this_mv)) {
- const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
- this_mv.col];
- unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
- in_what_stride, bestsad);
- if (thissad < bestsad) {
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, error_per_bit);
-
- if (thissad < bestsad) {
- bestsad = thissad;
+ const MV mv = {ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col};
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ error_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
best_site = j;
}
}
@@ -1633,7 +1620,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
ref_mv->col += neighbors[best_site].col;
}
}
- return bestsad;
+ return best_sad;
}
int vp9_refining_search_sadx4(const MACROBLOCK *x,
@@ -1702,8 +1689,9 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
if (is_mv_in(x, &this_mv)) {
const uint8_t *check_here = neighbors[j].row * in_what_stride +
neighbors[j].col + best_address;
- unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
- in_what_stride, bestsad);
+ unsigned int thissad = fn_ptr->sdf(what, what_stride,
+ check_here, in_what_stride,
+ bestsad);
if (thissad < bestsad) {
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
@@ -1740,48 +1728,36 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
const MV *center_mv,
const uint8_t *second_pred, int w, int h) {
- const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
{-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
- int i, j;
-
- const uint8_t *what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
- ref_mv->col];
- unsigned int thissad;
- MV this_mv;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
- /* Get compound pred by averaging two pred blocks. */
- unsigned int bestsad = fn_ptr->sdaf(what, what_stride,
- best_address, in_what_stride,
- second_pred, 0x7fffffff) +
+ unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride,
+ second_pred, 0x7fffffff) +
mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ int i, j;
for (i = 0; i < search_range; ++i) {
int best_site = -1;
- for (j = 0; j < 8; j++) {
- this_mv.row = ref_mv->row + neighbors[j].row;
- this_mv.col = ref_mv->col + neighbors[j].col;
+ for (j = 0; j < 8; ++j) {
+ const MV mv = {ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col};
- if (is_mv_in(x, &this_mv)) {
- const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
- this_mv.col];
-
- thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
- second_pred, bestsad);
- if (thissad < bestsad) {
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride,
+ second_pred, best_sad);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(&mv, &fcenter_mv,
mvjsadcost, mvsadcost, error_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
+ if (sad < best_sad) {
+ best_sad = sad;
best_site = j;
}
}
@@ -1795,5 +1771,5 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
ref_mv->col += neighbors[best_site].col;
}
}
- return bestsad;
+ return best_sad;
}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index ca91a67d5..f112fa24f 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -27,8 +27,10 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_tile_common.h"
+#include "vp9/encoder/vp9_aq_complexity.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_bitstream.h"
-#include "vp9/encoder/vp9_craq.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_firstpass.h"
@@ -38,17 +40,11 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_temporal_filter.h"
-#include "vp9/encoder/vp9_vaq.h"
#include "vp9/encoder/vp9_resize.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
-#define ALL_INTRA_MODES 0x3FF
-#define INTRA_DC_ONLY 0x01
-#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
-#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
-#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
-
void vp9_coef_tree_initialize();
#define DEFAULT_INTERP_FILTER SWITCHABLE
@@ -62,12 +58,6 @@ void vp9_coef_tree_initialize();
// now so that HIGH_PRECISION is always
// chosen.
-// Masks for partially or completely disabling split mode
-#define DISABLE_ALL_SPLIT 0x3F
-#define DISABLE_ALL_INTER_SPLIT 0x1F
-#define DISABLE_COMPOUND_SPLIT 0x18
-#define LAST_AND_INTRA_SPLIT_ONLY 0x1E
-
// Max rate target for 1080P and below encodes under normal circumstances
// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
#define MAX_MB_RATE 250
@@ -103,9 +93,6 @@ FILE *keyfile;
void vp9_init_quantizer(VP9_COMP *cpi);
-static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
- {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
@@ -144,17 +131,33 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
}
}
+static void setup_key_frame(VP9_COMP *cpi) {
+ vp9_setup_past_independence(&cpi->common);
+
+ // All buffers are implicitly updated on key frames.
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+}
+
+static void setup_inter_frame(VP9_COMMON *cm) {
+ if (cm->error_resilient_mode || cm->intra_only)
+ vp9_setup_past_independence(cm);
+
+ assert(cm->frame_context_idx < FRAME_CONTEXTS);
+ cm->fc = cm->frame_contexts[cm->frame_context_idx];
+}
+
void vp9_initialize_enc() {
static int init_done = 0;
if (!init_done) {
- vp9_initialize_common();
+ vp9_init_neighbors();
+ vp9_init_quant_tables();
+
vp9_coef_tree_initialize();
vp9_tokenize_initialize();
- vp9_init_quant_tables();
vp9_init_me_luts();
vp9_rc_init_minq_luts();
- // init_base_skip_probs();
vp9_entropy_mv_init();
vp9_entropy_mode_init();
init_done = 1;
@@ -163,6 +166,7 @@ void vp9_initialize_enc() {
static void dealloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+ int i;
// Delete sementation map
vpx_free(cpi->segmentation_map);
@@ -173,11 +177,13 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
cpi->coding_context.last_frame_seg_map_copy = NULL;
vpx_free(cpi->complexity_map);
- cpi->complexity_map = 0;
- vpx_free(cpi->cyclic_refresh.map);
- cpi->cyclic_refresh.map = 0;
+ cpi->complexity_map = NULL;
+
+ vp9_cyclic_refresh_free(cpi->cyclic_refresh);
+ cpi->cyclic_refresh = NULL;
+
vpx_free(cpi->active_map);
- cpi->active_map = 0;
+ cpi->active_map = NULL;
vp9_free_frame_buffers(cm);
@@ -195,11 +201,12 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->mb_norm_activity_map);
cpi->mb_norm_activity_map = 0;
- vpx_free(cpi->above_context[0]);
- cpi->above_context[0] = NULL;
-
- vpx_free(cpi->above_seg_context);
- cpi->above_seg_context = NULL;
+ for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
+ vpx_free(lc->rc_twopass_stats_in.buf);
+ lc->rc_twopass_stats_in.buf = NULL;
+ lc->rc_twopass_stats_in.sz = 0;
+ }
}
// Computes a q delta (in "q index" terms) to get from a starting q value
@@ -252,51 +259,12 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index,
return target_index - base_q_index;
}
-// This function sets up a set of segments with delta Q values around
-// the baseline frame quantizer.
-static void setup_in_frame_q_adj(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- struct segmentation *const seg = &cm->seg;
-
- // Make SURE use of floating point in this function is safe.
- vp9_clear_system_state();
-
- if (cm->frame_type == KEY_FRAME ||
- cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
- int segment;
-
- // Clear down the segment map
- vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-
- // Clear down the complexity map used for rd
- vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
-
- vp9_enable_segmentation(seg);
- vp9_clearall_segfeatures(seg);
-
- // Select delta coding method
- seg->abs_delta = SEGMENT_DELTADATA;
-
- // Segment 0 "Q" feature is disabled so it defaults to the baseline Q
- vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
-
- // Use some of the segments for in frame Q adjustment
- for (segment = 1; segment < 2; segment++) {
- const int qindex_delta =
- vp9_compute_qdelta_by_rate(cpi,
- cm->base_qindex,
- in_frame_q_adj_ratio[segment]);
- vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
- vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
- }
- }
-}
static void configure_static_seg_features(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
struct segmentation *const seg = &cm->seg;
- int high_q = (int)(cpi->rc.avg_q > 48.0);
+ int high_q = (int)(rc->avg_q > 48.0);
int qi_delta;
// Disable and clear down for KF
@@ -334,9 +302,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
seg->update_map = 1;
seg->update_data = 1;
- qi_delta = vp9_compute_qdelta(
- cpi, cpi->rc.avg_q, (cpi->rc.avg_q * 0.875));
- vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2));
+ qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 0.875);
+ vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
@@ -349,16 +316,15 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
// All other frames if segmentation has been enabled
// First normal frame in a valid gf or alt ref group
- if (cpi->rc.frames_since_golden == 0) {
+ if (rc->frames_since_golden == 0) {
// Set up segment features for normal frames in an arf group
- if (cpi->rc.source_alt_ref_active) {
+ if (rc->source_alt_ref_active) {
seg->update_map = 0;
seg->update_data = 1;
seg->abs_delta = SEGMENT_DELTADATA;
- qi_delta = vp9_compute_qdelta(cpi, cpi->rc.avg_q,
- (cpi->rc.avg_q * 1.125));
- vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2));
+ qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 1.125);
+ vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
@@ -383,7 +349,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
vp9_clearall_segfeatures(seg);
}
- } else if (cpi->rc.is_src_frame_alt_ref) {
+ } else if (rc->is_src_frame_alt_ref) {
// Special case where we are coding over the top of a previous
// alt ref frame.
// Segment coding disabled for compred testing
@@ -456,554 +422,137 @@ static int is_slowest_mode(int mode) {
}
static void set_rd_speed_thresholds(VP9_COMP *cpi) {
- SPEED_FEATURES *sf = &cpi->sf;
int i;
// Set baseline threshold values
for (i = 0; i < MAX_MODES; ++i)
- sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
-
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_NEARESTG] = 0;
- sf->thresh_mult[THR_NEARESTA] = 0;
-
- sf->thresh_mult[THR_DC] += 1000;
-
- sf->thresh_mult[THR_NEWMV] += 1000;
- sf->thresh_mult[THR_NEWA] += 1000;
- sf->thresh_mult[THR_NEWG] += 1000;
-
- sf->thresh_mult[THR_NEARMV] += 1000;
- sf->thresh_mult[THR_NEARA] += 1000;
- sf->thresh_mult[THR_COMP_NEARESTLA] += 1000;
- sf->thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
- sf->thresh_mult[THR_TM] += 1000;
-
- sf->thresh_mult[THR_COMP_NEARLA] += 1500;
- sf->thresh_mult[THR_COMP_NEWLA] += 2000;
- sf->thresh_mult[THR_NEARG] += 1000;
- sf->thresh_mult[THR_COMP_NEARGA] += 1500;
- sf->thresh_mult[THR_COMP_NEWGA] += 2000;
-
- sf->thresh_mult[THR_ZEROMV] += 2000;
- sf->thresh_mult[THR_ZEROG] += 2000;
- sf->thresh_mult[THR_ZEROA] += 2000;
- sf->thresh_mult[THR_COMP_ZEROLA] += 2500;
- sf->thresh_mult[THR_COMP_ZEROGA] += 2500;
-
- sf->thresh_mult[THR_H_PRED] += 2000;
- sf->thresh_mult[THR_V_PRED] += 2000;
- sf->thresh_mult[THR_D45_PRED ] += 2500;
- sf->thresh_mult[THR_D135_PRED] += 2500;
- sf->thresh_mult[THR_D117_PRED] += 2500;
- sf->thresh_mult[THR_D153_PRED] += 2500;
- sf->thresh_mult[THR_D207_PRED] += 2500;
- sf->thresh_mult[THR_D63_PRED] += 2500;
+ cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+
+ cpi->rd_thresh_mult[THR_NEARESTMV] = 0;
+ cpi->rd_thresh_mult[THR_NEARESTG] = 0;
+ cpi->rd_thresh_mult[THR_NEARESTA] = 0;
+
+ cpi->rd_thresh_mult[THR_DC] += 1000;
+
+ cpi->rd_thresh_mult[THR_NEWMV] += 1000;
+ cpi->rd_thresh_mult[THR_NEWA] += 1000;
+ cpi->rd_thresh_mult[THR_NEWG] += 1000;
+
+ cpi->rd_thresh_mult[THR_NEARMV] += 1000;
+ cpi->rd_thresh_mult[THR_NEARA] += 1000;
+ cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000;
+ cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+ cpi->rd_thresh_mult[THR_TM] += 1000;
+
+ cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500;
+ cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000;
+ cpi->rd_thresh_mult[THR_NEARG] += 1000;
+ cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500;
+ cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000;
+
+ cpi->rd_thresh_mult[THR_ZEROMV] += 2000;
+ cpi->rd_thresh_mult[THR_ZEROG] += 2000;
+ cpi->rd_thresh_mult[THR_ZEROA] += 2000;
+ cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500;
+ cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+ cpi->rd_thresh_mult[THR_H_PRED] += 2000;
+ cpi->rd_thresh_mult[THR_V_PRED] += 2000;
+ cpi->rd_thresh_mult[THR_D45_PRED ] += 2500;
+ cpi->rd_thresh_mult[THR_D135_PRED] += 2500;
+ cpi->rd_thresh_mult[THR_D117_PRED] += 2500;
+ cpi->rd_thresh_mult[THR_D153_PRED] += 2500;
+ cpi->rd_thresh_mult[THR_D207_PRED] += 2500;
+ cpi->rd_thresh_mult[THR_D63_PRED] += 2500;
/* disable frame modes if flags not set */
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
- sf->thresh_mult[THR_NEWMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
- sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARMV ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX;
+ cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX;
}
if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
- sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
- sf->thresh_mult[THR_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_NEWG ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX;
}
if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
- sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
- sf->thresh_mult[THR_ZEROA ] = INT_MAX;
- sf->thresh_mult[THR_NEARA ] = INT_MAX;
- sf->thresh_mult[THR_NEWA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX;
}
if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
(VP9_LAST_FLAG | VP9_ALT_FLAG)) {
- sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
}
if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
(VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
- sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
+ cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
}
}
static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
- SPEED_FEATURES *sf = &cpi->sf;
+ const SPEED_FEATURES *const sf = &cpi->sf;
int i;
for (i = 0; i < MAX_REFS; ++i)
- sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+ cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
- sf->thresh_mult_sub8x8[THR_LAST] += 2500;
- sf->thresh_mult_sub8x8[THR_GOLD] += 2500;
- sf->thresh_mult_sub8x8[THR_ALTR] += 2500;
- sf->thresh_mult_sub8x8[THR_INTRA] += 2500;
- sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
- sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+ cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500;
+ cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500;
+ cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500;
+ cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500;
+ cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+ cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500;
// Check for masked out split cases.
- for (i = 0; i < MAX_REFS; i++) {
+ for (i = 0; i < MAX_REFS; i++)
if (sf->disable_split_mask & (1 << i))
- sf->thresh_mult_sub8x8[i] = INT_MAX;
- }
+ cpi->rd_thresh_mult_sub8x8[i] = INT_MAX;
// disable mode test if frame flag is not set
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
- sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+ cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX;
if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
- sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+ cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
- sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+ cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
(VP9_LAST_FLAG | VP9_ALT_FLAG))
- sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+ cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
(VP9_GOLD_FLAG | VP9_ALT_FLAG))
- sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
-}
-
-static void set_good_speed_feature(VP9_COMMON *cm,
- SPEED_FEATURES *sf,
- int speed) {
- int i;
- sf->adaptive_rd_thresh = 1;
- sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
- if (speed == 1) {
- sf->use_square_partition_only = !frame_is_intra_only(cm);
- sf->less_rectangular_check = 1;
- sf->tx_size_search_method = frame_is_intra_only(cm)
- ? USE_FULL_RD : USE_LARGESTALL;
-
- if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = cm->show_frame ?
- DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- else
- sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
-
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
- sf->adaptive_pred_interp_filter = 1;
- sf->auto_mv_step_size = 1;
- sf->adaptive_rd_thresh = 2;
- sf->recode_loop = ALLOW_RECODE_KFARFGF;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
- }
- if (speed == 2) {
- sf->use_square_partition_only = !frame_is_intra_only(cm);
- sf->less_rectangular_check = 1;
- sf->tx_size_search_method = frame_is_intra_only(cm)
- ? USE_FULL_RD : USE_LARGESTALL;
-
- if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = cm->show_frame ?
- DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- else
- sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-
- sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
- FLAG_SKIP_INTRA_BESTINTER |
- FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_INTRA_LOWVAR;
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
- sf->adaptive_pred_interp_filter = 2;
- sf->reference_masking = 1;
- sf->auto_mv_step_size = 1;
-
- sf->disable_filter_search_var_thresh = 50;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
- sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
- sf->adjust_partitioning_from_last_frame = 1;
- sf->last_partitioning_redo_frequency = 3;
-
- sf->adaptive_rd_thresh = 2;
- sf->recode_loop = ALLOW_RECODE_KFARFGF;
- sf->use_lp32x32fdct = 1;
- sf->mode_skip_start = 11;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
- }
- if (speed == 3) {
- sf->use_square_partition_only = 1;
- sf->tx_size_search_method = USE_LARGESTALL;
-
- if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
- else
- sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
-
- sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
- FLAG_SKIP_INTRA_BESTINTER |
- FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_INTRA_LOWVAR;
-
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
- sf->adaptive_pred_interp_filter = 2;
- sf->reference_masking = 1;
- sf->auto_mv_step_size = 1;
-
- sf->disable_split_var_thresh = 32;
- sf->disable_filter_search_var_thresh = 100;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
- sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
- sf->adjust_partitioning_from_last_frame = 1;
- sf->last_partitioning_redo_frequency = 3;
-
- sf->use_uv_intra_rd_estimate = 1;
- sf->skip_encode_sb = 1;
- sf->use_lp32x32fdct = 1;
- sf->subpel_iters_per_step = 1;
- sf->use_fast_coef_updates = 2;
- sf->use_fast_coef_costing = 1;
-
- sf->adaptive_rd_thresh = 4;
- sf->mode_skip_start = 6;
- }
- if (speed == 4) {
- sf->use_square_partition_only = 1;
- sf->tx_size_search_method = USE_LARGESTALL;
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
-
- sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
- FLAG_SKIP_INTRA_BESTINTER |
- FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_COMP_REFMISMATCH |
- FLAG_SKIP_INTRA_LOWVAR |
- FLAG_EARLY_TERMINATE;
-
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
- sf->adaptive_pred_interp_filter = 2;
- sf->reference_masking = 1;
- sf->auto_mv_step_size = 1;
-
- sf->disable_split_var_thresh = 64;
- sf->disable_filter_search_var_thresh = 200;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
- sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
- sf->adjust_partitioning_from_last_frame = 1;
- sf->last_partitioning_redo_frequency = 3;
-
- sf->use_uv_intra_rd_estimate = 1;
- sf->skip_encode_sb = 1;
- sf->use_lp32x32fdct = 1;
- sf->subpel_iters_per_step = 1;
- sf->use_fast_coef_updates = 2;
- sf->use_fast_coef_costing = 1;
-
- sf->adaptive_rd_thresh = 4;
- sf->mode_skip_start = 6;
- }
- if (speed >= 5) {
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->partition_search_type = FIXED_PARTITION;
- sf->tx_size_search_method = frame_is_intra_only(cm) ?
- USE_FULL_RD : USE_LARGESTALL;
- sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
- FLAG_SKIP_INTRA_BESTINTER |
- FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_COMP_REFMISMATCH |
- FLAG_SKIP_INTRA_LOWVAR |
- FLAG_EARLY_TERMINATE;
- sf->use_rd_breakout = 1;
- sf->use_lp32x32fdct = 1;
- sf->optimize_coefficients = 0;
- sf->auto_mv_step_size = 1;
- sf->reference_masking = 1;
-
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
- sf->search_method = HEX;
- sf->subpel_iters_per_step = 1;
- sf->disable_split_var_thresh = 64;
- sf->disable_filter_search_var_thresh = 500;
- for (i = 0; i < TX_SIZES; i++) {
- sf->intra_y_mode_mask[i] = INTRA_DC_ONLY;
- sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
- }
- sf->use_fast_coef_updates = 2;
- sf->use_fast_coef_costing = 1;
- sf->adaptive_rd_thresh = 4;
- sf->mode_skip_start = 6;
- }
-}
-
-static void set_rt_speed_feature(VP9_COMMON *cm,
- SPEED_FEATURES *sf,
- int speed) {
- sf->static_segmentation = 0;
- sf->adaptive_rd_thresh = 1;
- sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
- sf->encode_breakout_thresh = 1;
- sf->use_fast_coef_costing = 1;
-
- if (speed == 1) {
- sf->use_square_partition_only = !frame_is_intra_only(cm);
- sf->less_rectangular_check = 1;
- sf->tx_size_search_method =
- frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
-
- if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = cm->show_frame ?
- DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- else
- sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
-
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
- sf->adaptive_pred_interp_filter = 1;
- sf->auto_mv_step_size = 1;
- sf->adaptive_rd_thresh = 2;
- sf->recode_loop = ALLOW_RECODE_KFARFGF;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->encode_breakout_thresh = 8;
- }
- if (speed >= 2) {
- sf->use_square_partition_only = !frame_is_intra_only(cm);
- sf->less_rectangular_check = 1;
- sf->tx_size_search_method =
- frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
-
- if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = cm->show_frame ?
- DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- else
- sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-
- sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH
- | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA
- | FLAG_SKIP_INTRA_LOWVAR;
-
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
- sf->adaptive_pred_interp_filter = 2;
- sf->auto_mv_step_size = 1;
- sf->reference_masking = 1;
-
- sf->disable_filter_search_var_thresh = 50;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
- sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
- sf->adjust_partitioning_from_last_frame = 1;
- sf->last_partitioning_redo_frequency = 3;
-
- sf->adaptive_rd_thresh = 2;
- sf->recode_loop = ALLOW_RECODE_KFARFGF;
- sf->use_lp32x32fdct = 1;
- sf->mode_skip_start = 11;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->encode_breakout_thresh = 200;
- }
- if (speed >= 3) {
- sf->use_square_partition_only = 1;
- sf->tx_size_search_method = USE_LARGESTALL;
-
- if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
- else
- sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
-
- sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH
- | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA
- | FLAG_SKIP_INTRA_LOWVAR;
-
- sf->disable_filter_search_var_thresh = 100;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
- sf->use_uv_intra_rd_estimate = 1;
- sf->skip_encode_sb = 1;
- sf->subpel_iters_per_step = 1;
- sf->use_fast_coef_updates = 2;
- sf->adaptive_rd_thresh = 4;
- sf->mode_skip_start = 6;
- sf->encode_breakout_thresh = 400;
- }
- if (speed >= 4) {
- sf->optimize_coefficients = 0;
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
- sf->use_fast_lpf_pick = 2;
- sf->encode_breakout_thresh = 700;
- }
- if (speed >= 5) {
- int i;
- sf->last_partitioning_redo_frequency = 4;
- sf->adaptive_rd_thresh = 5;
- sf->use_fast_coef_costing = 0;
- sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
- sf->adjust_partitioning_from_last_frame =
- cm->last_frame_type != cm->frame_type || (0 ==
- (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
- sf->subpel_force_stop = 1;
- for (i = 0; i < TX_SIZES; i++) {
- sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
- }
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
- sf->frame_parameter_update = 0;
- sf->encode_breakout_thresh = 1000;
- sf->search_method = FAST_HEX;
- sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
- sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->max_intra_bsize = BLOCK_32X32;
- }
- if (speed >= 6) {
- sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
- sf->search_method = HEX;
- }
- if (speed >= 7) {
- sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
- sf->use_nonrd_pick_mode = 1;
- sf->search_method = FAST_DIAMOND;
- }
- if (speed >= 8) {
- int i;
- for (i = 0; i < BLOCK_SIZES; ++i)
- sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0)
- }
+ cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
}
-void vp9_set_speed_features(VP9_COMP *cpi) {
- SPEED_FEATURES *sf = &cpi->sf;
- VP9_COMMON *cm = &cpi->common;
- int speed = cpi->speed;
- int i;
-
- // Convert negative speed to positive
- if (speed < 0)
- speed = -speed;
-
+static void set_speed_features(VP9_COMP *cpi) {
#if CONFIG_INTERNAL_STATS
+ int i;
for (i = 0; i < MAX_MODES; ++i)
cpi->mode_chosen_counts[i] = 0;
#endif
- // best quality defaults
- sf->frame_parameter_update = 1;
- sf->search_method = NSTEP;
- sf->recode_loop = ALLOW_RECODE;
- sf->subpel_search_method = SUBPEL_TREE;
- sf->subpel_iters_per_step = 2;
- sf->subpel_force_stop = 0;
- sf->optimize_coefficients = !cpi->oxcf.lossless;
- sf->reduce_first_step_size = 0;
- sf->auto_mv_step_size = 0;
- sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
- sf->comp_inter_joint_search_thresh = BLOCK_4X4;
- sf->adaptive_rd_thresh = 0;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
- sf->tx_size_search_method = USE_FULL_RD;
- sf->use_lp32x32fdct = 0;
- sf->adaptive_motion_search = 0;
- sf->adaptive_pred_interp_filter = 0;
- sf->reference_masking = 0;
- sf->partition_search_type = SEARCH_PARTITION;
- sf->less_rectangular_check = 0;
- sf->use_square_partition_only = 0;
- sf->auto_min_max_partition_size = NOT_IN_USE;
- sf->max_partition_size = BLOCK_64X64;
- sf->min_partition_size = BLOCK_4X4;
- sf->adjust_partitioning_from_last_frame = 0;
- sf->last_partitioning_redo_frequency = 4;
- sf->disable_split_mask = 0;
- sf->mode_search_skip_flags = 0;
- sf->disable_split_var_thresh = 0;
- sf->disable_filter_search_var_thresh = 0;
- for (i = 0; i < TX_SIZES; i++) {
- sf->intra_y_mode_mask[i] = ALL_INTRA_MODES;
- sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES;
- }
- sf->use_rd_breakout = 0;
- sf->skip_encode_sb = 0;
- sf->use_uv_intra_rd_estimate = 0;
- sf->use_fast_lpf_pick = 0;
- sf->use_fast_coef_updates = 0;
- sf->use_fast_coef_costing = 0;
- sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
- sf->use_nonrd_pick_mode = 0;
- sf->encode_breakout_thresh = 0;
- for (i = 0; i < BLOCK_SIZES; ++i)
- sf->disable_inter_mode_mask[i] = 0;
- sf->max_intra_bsize = BLOCK_64X64;
- // This setting only takes effect when partition_search_type is set
- // to FIXED_PARTITION.
- sf->always_this_block_size = BLOCK_16X16;
-
- switch (cpi->oxcf.mode) {
- case MODE_BESTQUALITY:
- case MODE_SECONDPASS_BEST: // This is the best quality mode.
- cpi->diamond_search_sad = vp9_full_range_search;
- break;
- case MODE_FIRSTPASS:
- case MODE_GOODQUALITY:
- case MODE_SECONDPASS:
- set_good_speed_feature(cm, sf, speed);
- break;
- case MODE_REALTIME:
- set_rt_speed_feature(cm, sf, speed);
- break;
- }; /* switch */
+ vp9_set_speed_features(cpi);
// Set rd thresholds based on mode and speed setting
set_rd_speed_thresholds(cpi);
set_rd_speed_thresholds_sub8x8(cpi);
- // Slow quant, dct and trellis not worthwhile for first pass
- // so make sure they are always turned off.
- if (cpi->pass == 1) {
- sf->optimize_coefficients = 0;
- }
-
- // No recode for 1 pass.
- if (cpi->pass == 0) {
- sf->recode_loop = DISALLOW_RECODE;
- sf->optimize_coefficients = 0;
- }
-
cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
}
-
- if (cpi->sf.subpel_search_method == SUBPEL_TREE) {
- cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
- cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
- }
-
- cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
-
- if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME &&
- sf->encode_breakout_thresh > cpi->encode_breakout)
- cpi->encode_breakout = sf->encode_breakout_thresh;
-
- if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
- sf->adaptive_pred_interp_filter = 0;
}
static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
@@ -1063,24 +612,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map,
vpx_calloc(sizeof(unsigned int),
cm->mb_rows * cm->mb_cols));
-
- // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
- // block where mi unit size is 8x8.
- vpx_free(cpi->above_context[0]);
- CHECK_MEM_ERROR(cm, cpi->above_context[0],
- vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
- MAX_MB_PLANE,
- sizeof(*cpi->above_context[0])));
-
- vpx_free(cpi->above_seg_context);
- CHECK_MEM_ERROR(cm, cpi->above_seg_context,
- vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
- sizeof(*cpi->above_seg_context)));
}
static void update_frame_size(VP9_COMP *cpi) {
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
vp9_update_frame_size(cm);
@@ -1111,12 +648,13 @@ static void update_frame_size(VP9_COMP *cpi) {
{
int i;
- for (i = 1; i < MAX_MB_PLANE; ++i) {
- cpi->above_context[i] = cpi->above_context[0] +
- i * sizeof(*cpi->above_context[0]) * 2 *
- mi_cols_aligned_to_sb(cm->mi_cols);
- }
+
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->above_context[i] = cm->above_context +
+ i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
}
+
+ xd->above_seg_context = cpi->common.above_seg_context;
}
// Table that converts 0-63 Q range values passed in outside to the Qindex
@@ -1209,6 +747,37 @@ static void set_tile_limits(VP9_COMP *cpi) {
cm->log2_tile_rows = cpi->oxcf.tile_rows;
}
+static void init_rate_control(const VP9_CONFIG *oxcf, int pass,
+ RATE_CONTROL *rc) {
+ if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+ rc->avg_frame_qindex[0] = oxcf->worst_allowed_q;
+ rc->avg_frame_qindex[1] = oxcf->worst_allowed_q;
+ rc->avg_frame_qindex[2] = oxcf->worst_allowed_q;
+ } else {
+ rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
+ rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
+ rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
+ }
+
+ rc->last_q[0] = oxcf->best_allowed_q;
+ rc->last_q[1] = oxcf->best_allowed_q;
+ rc->last_q[2] = oxcf->best_allowed_q;
+
+ rc->buffer_level = oxcf->starting_buffer_level;
+ rc->bits_off_target = oxcf->starting_buffer_level;
+
+ rc->rolling_target_bits = rc->av_per_frame_bandwidth;
+ rc->rolling_actual_bits = rc->av_per_frame_bandwidth;
+ rc->long_rolling_target_bits = rc->av_per_frame_bandwidth;
+ rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth;
+
+ rc->total_actual_bits = 0;
+ rc->total_target_vs_actual = 0;
+}
+
static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
VP9_COMMON *const cm = &cpi->common;
int i;
@@ -1228,43 +797,16 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
// Temporal scalability.
cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
- if (cpi->svc.number_temporal_layers > 1 &&
- cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ if ((cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+ (cpi->svc.number_spatial_layers > 1 &&
+ cpi->oxcf.mode == MODE_SECONDPASS_BEST)) {
vp9_init_layer_context(cpi);
}
// change includes all joint functionality
vp9_change_config(cpi, oxcf);
- // Initialize active best and worst q and average q values.
- if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
- cpi->rc.avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q;
- cpi->rc.avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q;
- cpi->rc.avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q;
- } else {
- cpi->rc.avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q +
- cpi->oxcf.best_allowed_q) / 2;
- cpi->rc.avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q +
- cpi->oxcf.best_allowed_q) / 2;
- cpi->rc.avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q +
- cpi->oxcf.best_allowed_q) / 2;
- }
- cpi->rc.last_q[0] = cpi->oxcf.best_allowed_q;
- cpi->rc.last_q[1] = cpi->oxcf.best_allowed_q;
- cpi->rc.last_q[2] = cpi->oxcf.best_allowed_q;
-
- // Initialise the starting buffer levels
- cpi->rc.buffer_level = cpi->oxcf.starting_buffer_level;
- cpi->rc.bits_off_target = cpi->oxcf.starting_buffer_level;
-
- cpi->rc.rolling_target_bits = cpi->rc.av_per_frame_bandwidth;
- cpi->rc.rolling_actual_bits = cpi->rc.av_per_frame_bandwidth;
- cpi->rc.long_rolling_target_bits = cpi->rc.av_per_frame_bandwidth;
- cpi->rc.long_rolling_actual_bits = cpi->rc.av_per_frame_bandwidth;
-
- cpi->rc.total_actual_bits = 0;
- cpi->rc.total_target_vs_actual = 0;
-
cpi->static_mb_pct = 0;
cpi->lst_fb_idx = 0;
@@ -1278,15 +820,11 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
cpi->fixed_divide[i] = 0x80000 / i;
}
-void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
+void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
VP9_COMMON *const cm = &cpi->common;
- if (!cpi || !oxcf)
- return;
-
- if (cm->version != oxcf->version) {
+ if (cm->version != oxcf->version)
cm->version = oxcf->version;
- }
cpi->oxcf = *oxcf;
@@ -1327,10 +865,16 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
cpi->oxcf.lossless = oxcf->lossless;
- cpi->mb.e_mbd.itxm_add = cpi->oxcf.lossless ? vp9_iwht4x4_add
- : vp9_idct4x4_add;
+ if (cpi->oxcf.lossless) {
+ // In lossless mode, make sure right quantizer range and correct transform
+ // is set.
+ cpi->oxcf.worst_allowed_q = 0;
+ cpi->oxcf.best_allowed_q = 0;
+ cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
+ } else {
+ cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
+ }
cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
-
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
cpi->refresh_golden_frame = 0;
@@ -1414,8 +958,9 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
}
update_frame_size(cpi);
- if (cpi->svc.number_temporal_layers > 1 &&
- cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ if ((cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+ (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
vp9_update_layer_context_change_config(cpi,
(int)cpi->oxcf.target_bandwidth);
}
@@ -1606,8 +1151,9 @@ static void free_pick_mode_context(MACROBLOCK *x) {
VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
int i, j;
- VP9_COMP *cpi = vpx_memalign(32, sizeof(VP9_COMP));
- VP9_COMMON *cm = cpi != NULL ? &cpi->common : NULL;
+ VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP));
+ VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL;
+ RATE_CONTROL *const rc = cpi != NULL ? &cpi->rc : NULL;
if (!cm)
return NULL;
@@ -1630,6 +1176,7 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->use_svc = 0;
init_config(cpi, oxcf);
+ init_rate_control(&cpi->oxcf, cpi->pass, &cpi->rc);
init_pick_mode_context(cpi);
cm->current_video_frame = 0;
@@ -1637,7 +1184,7 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
// Set reference frame sign bias for ALTREF frame to 1 (for now)
cm->ref_frame_sign_bias[ALTREF_FRAME] = 1;
- cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
+ rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
cpi->gold_is_last = 0;
cpi->alt_is_last = 0;
@@ -1652,8 +1199,8 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
// Create a map used for cyclic background refresh.
- CHECK_MEM_ERROR(cm, cpi->cyclic_refresh.map,
- vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
+ CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
+ vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
// And a place holder structure is the coding context
// for use if we want to save and restore it
@@ -1675,12 +1222,12 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->activity_avg = 90 << 12;
cpi->key_frame_frequency = cpi->oxcf.key_freq;
- cpi->rc.frames_since_key = 8; // Sensible default for first frame.
- cpi->rc.this_key_frame_forced = 0;
- cpi->rc.next_key_frame_forced = 0;
+ rc->frames_since_key = 8; // Sensible default for first frame.
+ rc->this_key_frame_forced = 0;
+ rc->next_key_frame_forced = 0;
- cpi->rc.source_alt_ref_pending = 0;
- cpi->rc.source_alt_ref_active = 0;
+ rc->source_alt_ref_pending = 0;
+ rc->source_alt_ref_active = 0;
cpi->refresh_alt_ref_frame = 0;
#if CONFIG_MULTIPLE_ARF
@@ -1736,17 +1283,17 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->first_time_stamp_ever = INT64_MAX;
- cpi->rc.frames_till_gf_update_due = 0;
+ rc->frames_till_gf_update_due = 0;
- cpi->rc.ni_av_qi = cpi->oxcf.worst_allowed_q;
- cpi->rc.ni_tot_qi = 0;
- cpi->rc.ni_frames = 0;
- cpi->rc.tot_q = 0.0;
- cpi->rc.avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q);
+ rc->ni_av_qi = cpi->oxcf.worst_allowed_q;
+ rc->ni_tot_qi = 0;
+ rc->ni_frames = 0;
+ rc->tot_q = 0.0;
+ rc->avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q);
- cpi->rc.rate_correction_factor = 1.0;
- cpi->rc.key_frame_rate_correction_factor = 1.0;
- cpi->rc.gf_rate_correction_factor = 1.0;
+ rc->rate_correction_factor = 1.0;
+ rc->key_frame_rate_correction_factor = 1.0;
+ rc->gf_rate_correction_factor = 1.0;
cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
@@ -1783,13 +1330,53 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
const size_t packet_sz = sizeof(FIRSTPASS_STATS);
const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
- cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
- cpi->twopass.stats_in = cpi->twopass.stats_in_start;
- cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
- vp9_init_second_pass(cpi);
+ if (cpi->svc.number_spatial_layers > 1
+ && cpi->svc.number_temporal_layers == 1) {
+ FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
+ FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0};
+ int i;
+
+ for (i = 0; i < oxcf->ss_number_layers; ++i) {
+ FIRSTPASS_STATS *const last_packet_for_layer =
+ &stats[packets - oxcf->ss_number_layers + i];
+ const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
+ const int packets_in_layer = (int)last_packet_for_layer->count + 1;
+ if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
+
+ vpx_free(lc->rc_twopass_stats_in.buf);
+
+ lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
+ CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
+ vpx_malloc(lc->rc_twopass_stats_in.sz));
+ lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
+ lc->twopass.stats_in = lc->twopass.stats_in_start;
+ lc->twopass.stats_in_end = lc->twopass.stats_in_start
+ + packets_in_layer - 1;
+ stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
+ }
+ }
+
+ for (i = 0; i < packets; ++i) {
+ const int layer_id = (int)stats[i].spatial_layer_id;
+ if (layer_id >= 0 && layer_id < oxcf->ss_number_layers
+ && stats_copy[layer_id] != NULL) {
+ *stats_copy[layer_id] = stats[i];
+ ++stats_copy[layer_id];
+ }
+ }
+
+ vp9_init_second_pass_spatial_svc(cpi);
+ } else {
+ cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
+ cpi->twopass.stats_in = cpi->twopass.stats_in_start;
+ cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
+
+ vp9_init_second_pass(cpi);
+ }
}
- vp9_set_speed_features(cpi);
+ set_speed_features(cpi);
// Default rd threshold factors for mode selection
for (i = 0; i < BLOCK_SIZES; ++i) {
@@ -2155,25 +1742,11 @@ int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
return 0;
}
-int vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
- if (ref_frame_flags > 7)
- return -1;
-
- cpi->ext_refresh_golden_frame = 0;
- cpi->ext_refresh_alt_ref_frame = 0;
- cpi->ext_refresh_last_frame = 0;
-
- if (ref_frame_flags & VP9_LAST_FLAG)
- cpi->ext_refresh_last_frame = 1;
-
- if (ref_frame_flags & VP9_GOLD_FLAG)
- cpi->ext_refresh_golden_frame = 1;
-
- if (ref_frame_flags & VP9_ALT_FLAG)
- cpi->ext_refresh_alt_ref_frame = 1;
-
+void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
+ cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
+ cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
+ cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
cpi->ext_refresh_frame_flags_pending = 1;
- return 0;
}
static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi,
@@ -2460,7 +2033,7 @@ static int recode_loop_test(const VP9_COMP *cpi,
return force_recode;
}
-static void update_reference_frames(VP9_COMP * const cpi) {
+void vp9_update_reference_frames(VP9_COMP *cpi) {
VP9_COMMON * const cm = &cpi->common;
// At this point the new frame has been encoded.
@@ -2531,7 +2104,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vpx_usec_timer_start(&timer);
- vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
+ vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
@@ -2544,7 +2117,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vp9_extend_frame_inner_borders(cm->frame_to_show);
}
-static void scale_references(VP9_COMP *cpi) {
+void vp9_scale_references(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MV_REFERENCE_FRAME ref_frame;
@@ -2670,21 +2243,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
// other inter-frames the encoder currently uses only two contexts;
// context 1 for ALTREF frames and context 0 for the others.
if (cm->frame_type == KEY_FRAME) {
- vp9_setup_key_frame(cpi);
+ setup_key_frame(cpi);
} else {
- if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
+ if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
- }
- vp9_setup_inter_frame(cpi);
+
+ setup_inter_frame(cm);
}
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_vaq_frame_setup(cpi);
} else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
- setup_in_frame_q_adj(cpi);
+ vp9_setup_in_frame_q_adj(cpi);
} else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- vp9_setup_cyclic_refresh_aq(cpi);
+ vp9_cyclic_refresh_setup(cpi);
}
// transform / motion compensation build reconstruction frame
vp9_encode_frame(cpi);
@@ -2728,12 +2301,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
// other inter-frames the encoder currently uses only two contexts;
// context 1 for ALTREF frames and context 0 for the others.
if (cm->frame_type == KEY_FRAME) {
- vp9_setup_key_frame(cpi);
+ setup_key_frame(cpi);
} else {
- if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
+ if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
- }
- vp9_setup_inter_frame(cpi);
+
+ setup_inter_frame(cm);
}
}
@@ -2742,7 +2315,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_vaq_frame_setup(cpi);
} else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
- setup_in_frame_q_adj(cpi);
+ vp9_setup_in_frame_q_adj(cpi);
}
// transform / motion compensation build reconstruction frame
@@ -2981,7 +2554,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
} else {
cpi->Source = cpi->un_scaled_source;
}
- scale_references(cpi);
+ vp9_scale_references(cpi);
vp9_clear_system_state();
@@ -3018,7 +2591,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Set various flags etc to special state if it is a key frame.
if (frame_is_intra_only(cm)) {
- vp9_setup_key_frame(cpi);
+ setup_key_frame(cpi);
// Reset the loop filter deltas and segmentation map.
vp9_reset_segment_features(&cm->seg);
@@ -3100,6 +2673,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_write_yuv_frame(cpi->Source);
#endif
+ set_speed_features(cpi);
+
// Decide q and q bounds.
q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
@@ -3109,8 +2684,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
}
- vp9_set_speed_features(cpi);
-
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
encode_without_recode_loop(cpi, size, dest, q);
} else {
@@ -3159,7 +2732,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
update_reference_segmentation_map(cpi);
release_scaled_references(cpi);
- update_reference_frames(cpi);
+ vp9_update_reference_frames(cpi);
for (t = TX_4X4; t <= TX_32X32; t++)
full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
@@ -3241,6 +2814,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Don't increment frame counters if this was an altref buffer
// update not a real frame
++cm->current_video_frame;
+ if (cpi->use_svc) {
+ LAYER_CONTEXT *lc;
+ if (cpi->svc.number_temporal_layers > 1) {
+ lc = &cpi->svc.layer_context[cpi->svc.temporal_layer_id];
+ } else {
+ lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ }
+ ++lc->current_video_frame_in_layer;
+ }
}
// restore prev_mi
@@ -3396,6 +2978,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (!cpi)
return -1;
+ if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) {
+ vp9_restore_layer_context(cpi);
+ }
+
vpx_usec_timer_start(&cmptimer);
cpi->source = NULL;
@@ -3528,7 +3114,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
- vp9_update_layer_framerate(cpi);
+ vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi);
}
@@ -3585,12 +3171,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
vp9_vaq_init();
}
- if (cpi->use_svc) {
- SvcEncode(cpi, size, dest, frame_flags);
- } else if (cpi->pass == 1) {
+ if (cpi->pass == 1 &&
+ (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
Pass1Encode(cpi, size, dest, frame_flags);
- } else if (cpi->pass == 2) {
+ } else if (cpi->pass == 2 &&
+ (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
Pass2Encode(cpi, size, dest, frame_flags);
+ } else if (cpi->use_svc) {
+ SvcEncode(cpi, size, dest, frame_flags);
} else {
// One pass encode
Pass0Encode(cpi, size, dest, frame_flags);
@@ -3609,8 +3197,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
// Save layer specific state.
- if (cpi->svc.number_temporal_layers > 1 &&
- cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ if ((cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+ (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
vp9_save_layer_context(cpi);
}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 9925e5f19..6dbe4d474 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -23,6 +23,7 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_lookahead.h"
@@ -30,6 +31,7 @@
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_variance.h"
@@ -114,75 +116,6 @@ typedef enum {
} THR_MODES_SUB8X8;
typedef enum {
- DIAMOND = 0,
- NSTEP = 1,
- HEX = 2,
- BIGDIA = 3,
- SQUARE = 4,
- FAST_HEX = 5,
- FAST_DIAMOND = 6
-} SEARCH_METHODS;
-
-typedef enum {
- USE_FULL_RD = 0,
- USE_LARGESTINTRA,
- USE_LARGESTINTRA_MODELINTER,
- USE_LARGESTALL
-} TX_SIZE_SEARCH_METHOD;
-
-typedef enum {
- NOT_IN_USE = 0,
- RELAXED_NEIGHBORING_MIN_MAX = 1,
- STRICT_NEIGHBORING_MIN_MAX = 2
-} AUTO_MIN_MAX_MODE;
-
-typedef enum {
- // Terminate search early based on distortion so far compared to
- // qp step, distortion in the neighborhood of the frame, etc.
- FLAG_EARLY_TERMINATE = 1 << 0,
-
- // Skips comp inter modes if the best so far is an intra mode.
- FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
-
- // Skips comp inter modes if the best single intermode so far does
- // not have the same reference as one of the two references being
- // tested.
- FLAG_SKIP_COMP_REFMISMATCH = 1 << 2,
-
- // Skips oblique intra modes if the best so far is an inter mode.
- FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
-
- // Skips oblique intra modes at angles 27, 63, 117, 153 if the best
- // intra so far is not one of the neighboring directions.
- FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
-
- // Skips intra modes other than DC_PRED if the source variance is small
- FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
-} MODE_SEARCH_SKIP_LOGIC;
-
-typedef enum {
- SUBPEL_TREE = 0,
- // Other methods to come
-} SUBPEL_SEARCH_METHODS;
-
-typedef enum {
- LAST_FRAME_PARTITION_OFF = 0,
- LAST_FRAME_PARTITION_LOW_MOTION = 1,
- LAST_FRAME_PARTITION_ALL = 2
-} LAST_FRAME_PARTITION_METHOD;
-
-typedef enum {
- // No recode.
- DISALLOW_RECODE = 0,
- // Allow recode for KF and exceeding maximum frame bandwidth.
- ALLOW_RECODE_KFMAXBW = 1,
- // Allow recode only for KF/ARF/GF frames.
- ALLOW_RECODE_KFARFGF = 2,
- // Allow recode for all frames based on bitrate constraints.
- ALLOW_RECODE = 3,
-} RECODE_LOOP_TYPE;
-
-typedef enum {
// encode_breakout is disabled.
ENCODE_BREAKOUT_DISABLED = 0,
// encode_breakout is enabled.
@@ -192,219 +125,6 @@ typedef enum {
} ENCODE_BREAKOUT_TYPE;
typedef enum {
- // Search partitions using RD/NONRD criterion
- SEARCH_PARTITION = 0,
-
- // Always use a fixed size partition
- FIXED_PARTITION = 1,
-
- // Use a fixed size partition in every 64X64 SB, where the size is
- // determined based on source variance
- VAR_BASED_FIXED_PARTITION = 2,
-
- // Use an arbitrary partitioning scheme based on source variance within
- // a 64X64 SB
- VAR_BASED_PARTITION
-} PARTITION_SEARCH_TYPE;
-
-typedef struct {
- // Frame level coding parameter update
- int frame_parameter_update;
-
- // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
- SEARCH_METHODS search_method;
-
- RECODE_LOOP_TYPE recode_loop;
-
- // Subpel_search_method can only be subpel_tree which does a subpixel
- // logarithmic search that keeps stepping at 1/2 pixel units until
- // you stop getting a gain, and then goes on to 1/4 and repeats
- // the same process. Along the way it skips many diagonals.
- SUBPEL_SEARCH_METHODS subpel_search_method;
-
- // Maximum number of steps in logarithmic subpel search before giving up.
- int subpel_iters_per_step;
-
- // Control when to stop subpel search
- int subpel_force_stop;
-
- // Thresh_mult is used to set a threshold for the rd score. A higher value
- // means that we will accept the best mode so far more often. This number
- // is used in combination with the current block size, and thresh_freq_fact
- // to pick a threshold.
- int thresh_mult[MAX_MODES];
- int thresh_mult_sub8x8[MAX_REFS];
-
- // This parameter controls the number of steps we'll do in a diamond
- // search.
- int max_step_search_steps;
-
- // This parameter controls which step in the n-step process we start at.
- // It's changed adaptively based on circumstances.
- int reduce_first_step_size;
-
- // If this is set to 1, we limit the motion search range to 2 times the
- // largest motion vector found in the last frame.
- int auto_mv_step_size;
-
- // Trellis (dynamic programming) optimization of quantized values (+1, 0).
- int optimize_coefficients;
-
- // Always set to 0. If on it enables 0 cost background transmission
- // (except for the initial transmission of the segmentation). The feature is
- // disabled because the addition of very large block sizes make the
- // backgrounds very to cheap to encode, and the segmentation we have
- // adds overhead.
- int static_segmentation;
-
- // If 1 we iterate finding a best reference for 2 ref frames together - via
- // a log search that iterates 4 times (check around mv for last for best
- // error of combined predictor then check around mv for alt). If 0 we
- // we just use the best motion vector found for each frame by itself.
- int comp_inter_joint_search_thresh;
-
- // This variable is used to cap the maximum number of times we skip testing a
- // mode to be evaluated. A high value means we will be faster.
- int adaptive_rd_thresh;
-
- // Enables skipping the reconstruction step (idct, recon) in the
- // intermediate steps assuming the last frame didn't have too many intra
- // blocks and the q is less than a threshold.
- int skip_encode_sb;
- int skip_encode_frame;
-
- // This variable allows us to reuse the last frames partition choices
- // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
- // frame as a starting point in low motion scenes or always use it. If set
- // we use last partitioning_redo frequency to determine how often to redo
- // the partitioning from scratch. Adjust_partitioning_from_last_frame
- // enables us to adjust up or down one partitioning from the last frames
- // partitioning.
- LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
-
- // Determine which method we use to determine transform size. We can choose
- // between options like full rd, largest for prediction size, largest
- // for intra and model coefs for the rest.
- TX_SIZE_SEARCH_METHOD tx_size_search_method;
-
- // Low precision 32x32 fdct keeps everything in 16 bits and thus is less
- // precise but significantly faster than the non lp version.
- int use_lp32x32fdct;
-
- // TODO(JBB): remove this as its no longer used.
-
- // After looking at the first set of modes (set by index here), skip
- // checking modes for reference frames that don't match the reference frame
- // of the best so far.
- int mode_skip_start;
-
- // TODO(JBB): Remove this.
- int reference_masking;
-
- PARTITION_SEARCH_TYPE partition_search_type;
-
- // Used if partition_search_type = FIXED_SIZE_PARTITION
- BLOCK_SIZE always_this_block_size;
-
- // Skip rectangular partition test when partition type none gives better
- // rd than partition type split.
- int less_rectangular_check;
-
- // Disable testing non square partitions. (eg 16x32)
- int use_square_partition_only;
-
- // Sets min and max partition sizes for this 64x64 region based on the
- // same 64x64 in last encoded frame, and the left and above neighbor.
- AUTO_MIN_MAX_MODE auto_min_max_partition_size;
-
- // Min and max partition size we enable (block_size) as per auto
- // min max, but also used by adjust partitioning, and pick_partitioning.
- BLOCK_SIZE min_partition_size;
- BLOCK_SIZE max_partition_size;
-
- // Whether or not we allow partitions one smaller or one greater than the last
- // frame's partitioning. Only used if use_lastframe_partitioning is set.
- int adjust_partitioning_from_last_frame;
-
- // How frequently we re do the partitioning from scratch. Only used if
- // use_lastframe_partitioning is set.
- int last_partitioning_redo_frequency;
-
- // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
- // it always, to allow it for only Last frame and Intra, disable it for all
- // inter modes or to enable it always.
- int disable_split_mask;
-
- // TODO(jingning): combine the related motion search speed features
- // This allows us to use motion search at other sizes as a starting
- // point for this motion search and limits the search range around it.
- int adaptive_motion_search;
-
- // Allows sub 8x8 modes to use the prediction filter that was determined
- // best for 8x8 mode. If set to 0 we always re check all the filters for
- // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
- // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
- int adaptive_pred_interp_filter;
-
- // Implements various heuristics to skip searching modes
- // The heuristics selected are based on flags
- // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
- unsigned int mode_search_skip_flags;
-
- // A source variance threshold below which the split mode is disabled
- unsigned int disable_split_var_thresh;
-
- // A source variance threshold below which filter search is disabled
- // Choose a very large value (UINT_MAX) to use 8-tap always
- unsigned int disable_filter_search_var_thresh;
-
- // These bit masks allow you to enable or disable intra modes for each
- // transform size separately.
- int intra_y_mode_mask[TX_SIZES];
- int intra_uv_mode_mask[TX_SIZES];
-
- // This variable enables an early break out of mode testing if the model for
- // rd built from the prediction signal indicates a value that's much
- // higher than the best rd we've seen so far.
- int use_rd_breakout;
-
- // This enables us to use an estimate for intra rd based on dc mode rather
- // than choosing an actual uv mode in the stage of encoding before the actual
- // final encode.
- int use_uv_intra_rd_estimate;
-
- // This feature controls how the loop filter level is determined:
- // 0: Try the full image with different values.
- // 1: Try a small portion of the image with different values.
- // 2: Estimate the level based on quantizer and frame type
- int use_fast_lpf_pick;
-
- // This feature limits the number of coefficients updates we actually do
- // by only looking at counts from 1/2 the bands.
- int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
-
- // This flag controls the use of non-RD mode decision.
- int use_nonrd_pick_mode;
-
- // This variable sets the encode_breakout threshold. Currently, it is only
- // enabled in real time mode.
- int encode_breakout_thresh;
-
- // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
- // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
- int disable_inter_mode_mask[BLOCK_SIZES];
-
- // This feature controls whether we do the expensive context update and
- // calculation in the rd coefficient costing loop.
- int use_fast_coef_costing;
-
- // This variable controls the maximum block size where intra blocks can be
- // used in inter frames.
- // TODO(aconverse): Fold this into one of the other many mode skips
- BLOCK_SIZE max_intra_bsize;
-} SPEED_FEATURES;
-
-typedef enum {
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
@@ -412,44 +132,12 @@ typedef enum {
} VPX_SCALING;
typedef enum {
- VP9_LAST_FLAG = 1 << 0,
- VP9_GOLD_FLAG = 1 << 1,
- VP9_ALT_FLAG = 1 << 2,
-} VP9_REFFRAME;
-
-typedef enum {
USAGE_LOCAL_FILE_PLAYBACK = 0,
USAGE_STREAM_FROM_SERVER = 1,
USAGE_CONSTRAINED_QUALITY = 2,
USAGE_CONSTANT_QUALITY = 3,
} END_USAGE;
-typedef struct {
- // Target percentage of blocks per frame that are cyclicly refreshed.
- int max_mbs_perframe;
- // Maximum q-delta as percentage of base q.
- int max_qdelta_perc;
- // Block size below which we don't apply cyclic refresh.
- BLOCK_SIZE min_block_size;
- // Macroblock starting index (unit of 8x8) for cycling through the frame.
- int mb_index;
- // Controls how long a block will need to wait to be refreshed again.
- int time_for_refresh;
- // Actual number of blocks that were applied delta-q (segment 1).
- int num_seg_blocks;
- // Actual encoding bits for segment 1.
- int actual_seg_bits;
- // RD mult. parameters for segment 1.
- int rdmult;
- // Cyclic refresh map.
- signed char *map;
- // Projected rate and distortion for the current superblock.
- int64_t projected_rate_sb;
- int64_t projected_dist_sb;
- // Thresholds applied to projected rate/distortion of the superblock.
- int64_t thresh_rate_sb;
- int64_t thresh_dist_sb;
-} CYCLIC_REFRESH;
typedef enum {
// Good Quality Fast Encoding. The encoder balances quality with the
// amount of time it takes to encode the output. (speed setting
@@ -665,6 +353,13 @@ typedef struct VP9_COMP {
// Ambient reconstruction err target for force key frames
int ambient_err;
+ // Thresh_mult is used to set a threshold for the rd score. A higher value
+ // means that we will accept the best mode so far more often. This number
+ // is used in combination with the current block size, and thresh_freq_fact
+ // to pick a threshold.
+ int rd_thresh_mult[MAX_MODES];
+ int rd_thresh_mult_sub8x8[MAX_REFS];
+
int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
@@ -741,7 +436,7 @@ typedef struct VP9_COMP {
unsigned char *active_map;
unsigned int active_map_enabled;
- CYCLIC_REFRESH cyclic_refresh;
+ CYCLIC_REFRESH *cyclic_refresh;
fractional_mv_step_fp *find_fractional_mv_step;
fractional_mv_step_comp_fp *find_fractional_mv_step_comp;
@@ -799,10 +494,6 @@ typedef struct VP9_COMP {
unsigned int activity_avg;
unsigned int *mb_activity_map;
int *mb_norm_activity_map;
- int output_partition;
-
- // Force next frame to intra when kf_auto says so.
- int force_next_frame_intra;
int droppable;
@@ -834,13 +525,6 @@ typedef struct VP9_COMP {
// Debug / test stats
int64_t mode_test_hits[BLOCK_SIZES];
#endif
-
- // Y,U,V,(A)
- ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
- ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
-
- PARTITION_CONTEXT *above_seg_context;
- PARTITION_CONTEXT left_seg_context[8];
} VP9_COMP;
void vp9_initialize_enc();
@@ -848,7 +532,7 @@ void vp9_initialize_enc();
struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf);
void vp9_remove_compressor(VP9_COMP *cpi);
-void vp9_change_config(VP9_COMP *cpi, VP9_CONFIG *oxcf);
+void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf);
// receive a frames worth of data. caller can assume that a copy of this
// frame is made and not just a copy of the pointer..
@@ -865,7 +549,7 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags);
-int vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
+void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
@@ -897,8 +581,8 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc);
int vp9_get_quantizer(struct VP9_COMP *cpi);
-static int get_ref_frame_idx(const VP9_COMP *cpi,
- MV_REFERENCE_FRAME ref_frame) {
+static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
+ MV_REFERENCE_FRAME ref_frame) {
if (ref_frame == LAST_FRAME) {
return cpi->lst_fb_idx;
} else if (ref_frame == GOLDEN_FRAME) {
@@ -908,14 +592,22 @@ static int get_ref_frame_idx(const VP9_COMP *cpi,
}
}
-static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi,
- MV_REFERENCE_FRAME ref_frame) {
- VP9_COMMON *const cm = &cpi->common;
- return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi,
- ref_frame)]].buf;
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
+ VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+ VP9_COMMON * const cm = &cpi->common;
+ return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
+ .buf;
}
-void vp9_set_speed_features(VP9_COMP *cpi);
+static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
+ // TODO(JBB): make this work for alpha channel and double check we can't
+ // exceed this token count if we have a 32x32 transform crossing a boundary
+ // at a multiple of 16.
+ // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
+ // resolution. We assume up to 1 token per pixel, and then allow
+ // a head room of 4.
+ return mb_rows * mb_cols * (16 * 16 * 3 + 4);
+}
int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *reference);
@@ -927,16 +619,17 @@ int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget);
int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index,
double rate_target_ratio);
-static int get_token_alloc(int mb_rows, int mb_cols) {
- return mb_rows * mb_cols * (48 * 16 + 4);
-}
+void vp9_scale_references(VP9_COMP *cpi);
+
+void vp9_update_reference_frames(VP9_COMP *cpi);
extern const int q_trans[];
int64_t vp9_rescale(int64_t val, int64_t num, int denom);
-static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
- MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) {
+static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
+ MV_REFERENCE_FRAME ref0,
+ MV_REFERENCE_FRAME ref1) {
xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
: 0];
xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index b5f490199..92ad1e745 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -10,16 +10,18 @@
#include <assert.h>
#include <limits.h>
+
+#include "./vpx_scale_rtcd.h"
+
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_quant_common.h"
+
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_picklpf.h"
#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/common/vp9_quant_common.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/vpx_scale.h"
-#include "vp9/common/vp9_alloccommon.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "./vpx_scale_rtcd.h"
static int get_max_filter_level(VP9_COMP *cpi) {
return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
@@ -28,11 +30,11 @@ static int get_max_filter_level(VP9_COMP *cpi) {
static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
- MACROBLOCKD *const xd, VP9_COMMON *const cm,
int filt_level, int partial_frame) {
+ VP9_COMMON *const cm = &cpi->common;
int filt_err;
- vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame);
filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
// Re-instate the unfiltered frame
@@ -43,7 +45,6 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
int partial_frame) {
- MACROBLOCKD *const xd = &cpi->mb.e_mbd;
VP9_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
const int min_filter_level = 0;
@@ -64,7 +65,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
// Make a copy of the unfiltered / processed recon buffer
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
- best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial_frame);
+ best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame);
filt_best = filt_mid;
ss_err[filt_mid] = best_err;
@@ -86,7 +87,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
if (filt_direction <= 0 && filt_low != filt_mid) {
// Get Low filter error score
if (ss_err[filt_low] < 0) {
- filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial_frame);
+ filt_err = try_filter_frame(sd, cpi, filt_low, partial_frame);
ss_err[filt_low] = filt_err;
} else {
filt_err = ss_err[filt_low];
@@ -105,7 +106,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
// Now look at filt_high
if (filt_direction >= 0 && filt_high != filt_mid) {
if (ss_err[filt_high] < 0) {
- filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial_frame);
+ filt_err = try_filter_frame(sd, cpi, filt_high, partial_frame);
ss_err[filt_high] = filt_err;
} else {
filt_err = ss_err[filt_high];
@@ -119,7 +120,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
// Half the step distance if the best filter value was the same as last time
if (filt_best == filt_mid) {
- filter_step = filter_step / 2;
+ filter_step /= 2;
filt_direction = 0;
} else {
filt_direction = (filt_best < filt_mid) ? -1 : 1;
@@ -131,25 +132,24 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
}
void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
- int method) {
+ LPF_PICK_METHOD method) {
VP9_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
: cpi->oxcf.sharpness;
- if (method == 2) {
+ if (method == LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = get_max_filter_level(cpi);
const int q = vp9_ac_quant(cm->base_qindex, 0);
// These values were determined by linear fitting the result of the
- // searched level
- // filt_guess = q * 0.316206 + 3.87252
- int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18;
+ // searched level, filt_guess = q * 0.316206 + 3.87252
+ int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
if (cm->frame_type == KEY_FRAME)
filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
} else {
- search_filter_level(sd, cpi, method == 1);
+ search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
}
}
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h
index 203ef871a..7d08ddb5f 100644
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -16,11 +16,13 @@
extern "C" {
#endif
+#include "vp9/encoder/vp9_onyx_int.h"
+
struct yv12_buffer_config;
struct VP9_COMP;
void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
- struct VP9_COMP *cpi, int method);
+ struct VP9_COMP *cpi, LPF_PICK_METHOD method);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 0a396fff0..c500a0163 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -26,14 +26,13 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
-static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int_mv *tmp_mv) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
- int bestsme = INT_MAX;
int step_param;
int sadpb = x->sadperbit16;
MV mvp_full;
@@ -46,9 +45,6 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
- int buf_offset;
- int stride = xd->plane[0].pre[0].stride;
-
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
if (scaled_ref_frame) {
@@ -77,11 +73,14 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
- return INT_MAX;
+ return;
}
}
-
- mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
+ assert(x->mv_best_ref_index[ref] <= 2);
+ if (x->mv_best_ref_index[ref] < 2)
+ mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
+ else
+ mvp_full = x->pred_mv[ref].as_mv;
mvp_full.col >>= 3;
mvp_full.row >>= 3;
@@ -129,17 +128,6 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
-
- // TODO(jingning) This step can be merged into full pixel search step in the
- // re-designed log-diamond search
- buf_offset = tmp_mv->as_mv.row * stride + tmp_mv->as_mv.col;
-
- // Find sad for current vector.
- bestsme = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].pre[0].buf + buf_offset,
- stride, 0x7fffffff);
-
- return bestsme;
}
static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -184,6 +172,8 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
+
+ x->pred_mv[ref].as_mv = *tmp_mv;
}
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
@@ -196,7 +186,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
int rate;
int64_t dist;
-
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -233,12 +222,22 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t best_rd = INT64_MAX;
int64_t this_rd = INT64_MAX;
- const int64_t inter_mode_thresh = 300;
- const int64_t intra_mode_cost = 50;
-
int rate = INT_MAX;
int64_t dist = INT64_MAX;
+ VP9_COMMON *cm = &cpi->common;
+ int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
+
+ const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
+ intra_cost_penalty, 0);
+ const int64_t intra_mode_cost = 50;
+
+ unsigned char segment_id = mbmi->segment_id;
+ const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
+ const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+ // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
+ int mode_idx[MB_MODE_COUNT] = {0};
+
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
x->skip = 0;
@@ -256,7 +255,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
EIGHTTAP : cpi->common.interp_filter;
mbmi->skip = 0;
- mbmi->segment_id = 0;
+ mbmi->segment_id = segment_id;
xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
@@ -282,6 +281,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame[0] = ref_frame;
+ // Set conversion index for LAST_FRAME.
+ if (ref_frame == LAST_FRAME) {
+ mode_idx[NEARESTMV] = THR_NEARESTMV; // LAST_FRAME, NEARESTMV
+ mode_idx[NEARMV] = THR_NEARMV; // LAST_FRAME, NEARMV
+ mode_idx[ZEROMV] = THR_ZEROMV; // LAST_FRAME, ZEROMV
+ mode_idx[NEWMV] = THR_NEWMV; // LAST_FRAME, NEWMV
+ }
+
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate_mv = 0;
@@ -289,13 +296,17 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(1 << INTER_OFFSET(this_mode)))
continue;
+ if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] *
+ rd_thresh_freq_fact[this_mode] >> 5) ||
+ rd_threshes[mode_idx[this_mode]] == INT_MAX)
+ continue;
+
if (this_mode == NEWMV) {
if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
- x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
- full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame]);
+ full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame]);
if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
continue;
@@ -345,6 +356,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
rate += x->mbmode_cost[this_mode];
+ rate += intra_cost_penalty;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
if (this_rd + intra_mode_cost < best_rd) {
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index edc48bb16..3c8baad79 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -55,10 +55,9 @@ static int kf_low = 400;
// formulaic approach to facilitate easier adjustment of the Q tables.
// The formulae were derived from computing a 3rd order polynomial best
// fit to the original data (after plotting real maxq vs minq (not q index))
-static int calculate_minq_index(double maxq,
- double x3, double x2, double x1, double c) {
+static int get_minq_index(double maxq, double x3, double x2, double x1) {
int i;
- const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c,
+ const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq,
maxq);
// Special case handling to deal with the step from q2.0
@@ -66,57 +65,26 @@ static int calculate_minq_index(double maxq,
if (minqtarget <= 2.0)
return 0;
- for (i = 0; i < QINDEX_RANGE; i++) {
+ for (i = 0; i < QINDEX_RANGE; i++)
if (minqtarget <= vp9_convert_qindex_to_q(i))
return i;
- }
return QINDEX_RANGE - 1;
}
-void vp9_rc_init_minq_luts(void) {
+void vp9_rc_init_minq_luts() {
int i;
for (i = 0; i < QINDEX_RANGE; i++) {
const double maxq = vp9_convert_qindex_to_q(i);
-
- kf_low_motion_minq[i] = calculate_minq_index(maxq,
- 0.000001,
- -0.0004,
- 0.15,
- 0.0);
- kf_high_motion_minq[i] = calculate_minq_index(maxq,
- 0.000002,
- -0.0012,
- 0.50,
- 0.0);
-
- gf_low_motion_minq[i] = calculate_minq_index(maxq,
- 0.0000015,
- -0.0009,
- 0.32,
- 0.0);
- gf_high_motion_minq[i] = calculate_minq_index(maxq,
- 0.0000021,
- -0.00125,
- 0.50,
- 0.0);
- afq_low_motion_minq[i] = calculate_minq_index(maxq,
- 0.0000015,
- -0.0009,
- 0.33,
- 0.0);
- afq_high_motion_minq[i] = calculate_minq_index(maxq,
- 0.0000021,
- -0.00125,
- 0.55,
- 0.0);
- inter_minq[i] = calculate_minq_index(maxq,
- 0.00000271,
- -0.00113,
- 0.75,
- 0.0);
+ kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
+ kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
+ gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
+ gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
+ afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
+ afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
+ inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
}
}
@@ -183,25 +151,6 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
cm->fc = cc->fc;
}
-void vp9_setup_key_frame(VP9_COMP *cpi) {
- VP9_COMMON *cm = &cpi->common;
-
- vp9_setup_past_independence(cm);
-
- /* All buffers are implicitly updated on key frames. */
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 1;
-}
-
-void vp9_setup_inter_frame(VP9_COMP *cpi) {
- VP9_COMMON *cm = &cpi->common;
- if (cm->error_resilient_mode || cm->intra_only)
- vp9_setup_past_independence(cm);
-
- assert(cm->frame_context_idx < FRAME_CONTEXTS);
- cm->fc = cm->frame_contexts[cm->frame_context_idx];
-}
-
static int estimate_bits_at_q(int frame_kind, int q, int mbs,
double correction_factor) {
const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor));
@@ -984,7 +933,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
active_best_quality, active_worst_quality);
if (q > *top_index) {
// Special case when we are targeting the max allowed rate.
- if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+ if (rc->this_frame_target >= rc->max_frame_bandwidth)
*top_index = q;
else
q = *top_index;
@@ -1036,6 +985,9 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
q /= 3;
if (q == 0)
q++;
+ if (cpi->sf.partition_check == 1)
+ q -= 10;
+
if (q < *bottom_index)
*bottom_index = q;
else if (q > *top_index)
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index ed6266fe2..87421af5f 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -87,13 +87,9 @@ struct VP9_COMP;
void vp9_save_coding_context(struct VP9_COMP *cpi);
void vp9_restore_coding_context(struct VP9_COMP *cpi);
-void vp9_setup_key_frame(struct VP9_COMP *cpi);
-void vp9_setup_inter_frame(struct VP9_COMP *cpi);
-
double vp9_convert_qindex_to_q(int qindex);
-// initialize luts for minq
-void vp9_rc_init_minq_luts(void);
+void vp9_rc_init_minq_luts();
// Generally at the high level, the following flow is expected
// to be enforced for rate control:
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c7e730aad..ba7d4db12 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -244,7 +244,6 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
static void set_block_thresholds(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
- const SPEED_FEATURES *const sf = &cpi->sf;
int i, bsize, segment_id;
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
@@ -261,13 +260,13 @@ static void set_block_thresholds(VP9_COMP *cpi) {
for (i = 0; i < MAX_MODES; ++i)
cpi->rd_threshes[segment_id][bsize][i] =
- sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4
+ cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
: INT_MAX;
for (i = 0; i < MAX_REFS; ++i) {
cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
- sf->thresh_mult_sub8x8[i] < thresh_max
- ? sf->thresh_mult_sub8x8[i] * t / 4
+ cpi->rd_thresh_mult_sub8x8[i] < thresh_max
+ ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
: INT_MAX;
}
}
@@ -566,7 +565,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][is_inter_block(mbmi)];
- uint8_t *p_tok = x->token_cache;
+ uint8_t token_cache[32 * 32];
int pt = combine_entropy_contexts(*A, *L);
int c, cost;
// Check for consistency of tx_size with mode info
@@ -584,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
int v = qcoeff[0];
int prev_t = vp9_dct_value_tokens_ptr[v].token;
cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
- p_tok[0] = vp9_pt_energy_class[prev_t];
+ token_cache[0] = vp9_pt_energy_class[prev_t];
++token_costs;
// ac tokens
@@ -597,9 +596,9 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
if (use_fast_coef_costing) {
cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
} else {
- pt = get_coef_context(nb, p_tok, c);
+ pt = get_coef_context(nb, token_cache, c);
cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
- p_tok[rc] = vp9_pt_energy_class[t];
+ token_cache[rc] = vp9_pt_energy_class[t];
}
prev_t = t;
if (!--band_left) {
@@ -613,7 +612,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
if (use_fast_coef_costing) {
cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
} else {
- pt = get_coef_context(nb, p_tok, c);
+ pt = get_coef_context(nb, token_cache, c);
cost += (*token_costs)[0][pt][EOB_TOKEN];
}
}
@@ -1450,9 +1449,9 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
*mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
}
-static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
+static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
int mode_context) {
- MACROBLOCK *const x = &cpi->mb;
+ const MACROBLOCK *const x = &cpi->mb;
const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
// Don't account for mode here if segment skip is enabled.
@@ -1669,6 +1668,45 @@ static INLINE int mv_has_subpel(const MV *mv) {
return (mv->row & 0x0F) || (mv->col & 0x0F);
}
+// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
+// TODO(aconverse): Find out if this is still productive then clean up or remove
+static int check_best_zero_mv(
+ const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
+ int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
+ int disable_inter_mode_mask, int this_mode, int ref_frame,
+ int second_ref_frame) {
+ if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+ (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
+ frame_mv[this_mode][ref_frame].as_int == 0 &&
+ (second_ref_frame == NONE ||
+ frame_mv[this_mode][second_ref_frame].as_int == 0)) {
+ int rfc = mode_context[ref_frame];
+ int c1 = cost_mv_ref(cpi, NEARMV, rfc);
+ int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
+ int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
+
+ if (this_mode == NEARMV) {
+ if (c1 > c3) return 0;
+ } else if (this_mode == NEARESTMV) {
+ if (c2 > c3) return 0;
+ } else {
+ assert(this_mode == ZEROMV);
+ if (second_ref_frame == NONE) {
+ if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
+ return 0;
+ } else {
+ if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
+ frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
+ frame_mv[NEARMV][second_ref_frame].as_int == 0))
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
BEST_SEG_INFO *bsi_buf, int filter_idx,
@@ -1737,43 +1775,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (disable_inter_mode_mask & (1 << mode_idx))
continue;
- // if we're near/nearest and mv == 0,0, compare to zeromv
- if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
- (this_mode == NEARMV || this_mode == NEARESTMV ||
- this_mode == ZEROMV) &&
- frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
- (!has_second_rf ||
- frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
- int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
- int c1 = cost_mv_ref(cpi, NEARMV, rfc);
- int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
- int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-
- if (this_mode == NEARMV) {
- if (c1 > c3)
- continue;
- } else if (this_mode == NEARESTMV) {
- if (c2 > c3)
- continue;
- } else {
- assert(this_mode == ZEROMV);
- if (!has_second_rf) {
- if ((c3 >= c2 &&
- frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
- (c3 >= c1 &&
- frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
- continue;
- } else {
- if ((c3 >= c2 &&
- frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
- frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
- (c3 >= c1 &&
- frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
- frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
- continue;
- }
- }
- }
+ if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+ disable_inter_mode_mask,
+ this_mode, mbmi->ref_frame[0],
+ mbmi->ref_frame[1]))
+ continue;
vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
@@ -2166,10 +2172,9 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
max_mv = MAX(max_mv,
MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
// only need to check zero mv once
- if (!this_mv.as_int && zero_seen) {
- x->mode_sad[ref_frame][i] = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
+ if (!this_mv.as_int && zero_seen)
continue;
- }
+
zero_seen = zero_seen || !this_mv.as_int;
row_offset = this_mv.as_mv.row >> 3;
@@ -2180,9 +2185,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
ref_y_ptr, ref_y_stride,
0x7fffffff);
- x->mode_sad[ref_frame][i] = this_sad;
- if (this_mv.as_int == 0)
- x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = this_sad;
// Note if it is the best so far.
if (this_sad < best_sad) {
@@ -2191,12 +2193,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- if (!zero_seen)
- x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] =
- cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
- ref_y_buffer, ref_y_stride,
- 0x7fffffff);
-
// Note the index of the mv that worked best in the reference list.
x->mv_best_ref_index[ref_frame] = best_index;
x->max_mv_context[ref_frame] = max_mv;
@@ -3133,7 +3129,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
const struct segmentation *const seg = &cm->seg;
- const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
MB_PREDICTION_MODE this_mode;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
unsigned char segment_id = mbmi->segment_id;
@@ -3199,7 +3194,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
vp9_setup_buffer_inter(cpi, x, tile,
- ref_frame, block_size, mi_row, mi_col,
+ ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
@@ -3381,46 +3376,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
} else {
- // TODO(aconverse): Find out if this is still productive then clean up or
- // remove
- // if we're near/nearest and mv == 0,0, compare to zeromv
if (x->in_active_map &&
- !(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
- (this_mode == NEARMV || this_mode == NEARESTMV ||
- this_mode == ZEROMV) &&
- frame_mv[this_mode][ref_frame].as_int == 0 &&
- !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
- (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) {
- int rfc = mbmi->mode_context[ref_frame];
- int c1 = cost_mv_ref(cpi, NEARMV, rfc);
- int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
- int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-
- if (this_mode == NEARMV) {
- if (c1 > c3)
- continue;
- } else if (this_mode == NEARESTMV) {
- if (c2 > c3)
- continue;
- } else {
- assert(this_mode == ZEROMV);
- if (!comp_pred) {
- if ((c3 >= c2 &&
- frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
- (c3 >= c1 &&
- frame_mv[NEARMV][ref_frame].as_int == 0))
- continue;
- } else {
- if ((c3 >= c2 &&
- frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
- frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
- (c3 >= c1 &&
- frame_mv[NEARMV][ref_frame].as_int == 0 &&
- frame_mv[NEARMV][second_ref_frame].as_int == 0))
- continue;
- }
- }
- }
+ !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+ disable_inter_mode_mask, this_mode, ref_frame,
+ second_ref_frame))
+ continue;
}
mbmi->mode = this_mode;
@@ -3800,7 +3761,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
const struct segmentation *seg = &cm->seg;
- const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
unsigned char segment_id = mbmi->segment_id;
int comp_pred, i;
@@ -3860,7 +3820,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
vp9_setup_buffer_inter(cpi, x, tile,
- ref_frame, block_size, mi_row, mi_col,
+ ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
yv12_mb);
}
diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c
index 58c5df47e..9d8da0da4 100644
--- a/vp9/encoder/vp9_sad.c
+++ b/vp9/encoder/vp9_sad.c
@@ -44,7 +44,7 @@ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \
const uint8_t *second_pred, \
unsigned int max_sad) { \
uint8_t comp_pred[m * n]; \
- comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
+ vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
return sad(src_ptr, src_stride, comp_pred, m, m, n); \
}
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
new file mode 100644
index 000000000..f09035077
--- /dev/null
+++ b/vp9/encoder/vp9_speed_features.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_speed_features.h"
+
+#define ALL_INTRA_MODES 0x3FF
+#define INTRA_DC_ONLY 0x01
+#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
+#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
+#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
+
+// Masks for partially or completely disabling split mode
+#define DISABLE_ALL_SPLIT 0x3F
+#define DISABLE_ALL_INTER_SPLIT 0x1F
+#define DISABLE_COMPOUND_SPLIT 0x18
+#define LAST_AND_INTRA_SPLIT_ONLY 0x1E
+
+// Intra only frames, golden frames (except alt ref overlays) and
+// alt ref frames tend to be coded at a higher than ambient quality
+static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
+ return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
+}
+
+static void set_good_speed_feature(VP9_COMP *cpi,
+ VP9_COMMON *cm,
+ SPEED_FEATURES *sf,
+ int speed) {
+ int i;
+ sf->adaptive_rd_thresh = 1;
+ sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
+ sf->allow_skip_recode = 1;
+
+ if (speed >= 1) {
+ sf->use_square_partition_only = !frame_is_intra_only(cm);
+ sf->less_rectangular_check = 1;
+ sf->tx_size_search_method = frame_is_boosted(cpi)
+ ? USE_FULL_RD : USE_LARGESTALL;
+
+ if (MIN(cm->width, cm->height) >= 720)
+ sf->disable_split_mask = cm->show_frame ?
+ DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ else
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+ sf->use_rd_breakout = 1;
+ sf->adaptive_motion_search = 1;
+ sf->auto_mv_step_size = 1;
+ sf->adaptive_rd_thresh = 2;
+ sf->subpel_iters_per_step = 1;
+ sf->mode_skip_start = 10;
+ sf->adaptive_pred_interp_filter = 1;
+
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ }
+ // Additions or changes from speed 1 for speed >= 2.
+ if (speed >= 2) {
+ sf->tx_size_search_method = frame_is_boosted(cpi)
+ ? USE_FULL_RD : USE_LARGESTALL;
+
+ if (MIN(cm->width, cm->height) >= 720)
+ sf->disable_split_mask = cm->show_frame ?
+ DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ else
+ sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
+
+ sf->adaptive_pred_interp_filter = 2;
+
+ sf->reference_masking = 1;
+ sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
+ FLAG_SKIP_INTRA_BESTINTER |
+ FLAG_SKIP_COMP_BESTINTRA |
+ FLAG_SKIP_INTRA_LOWVAR;
+ sf->disable_filter_search_var_thresh = 100;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+ sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+ sf->adjust_partitioning_from_last_frame = 1;
+ sf->last_partitioning_redo_frequency = 3;
+ }
+ // Additions or changes for speed 3 and above
+ if (speed >= 3) {
+ if (MIN(cm->width, cm->height) >= 720)
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ else
+ sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
+
+ sf->recode_loop = ALLOW_RECODE_KFMAXBW;
+
+ sf->adaptive_rd_thresh = 3;
+ sf->mode_skip_start = 6;
+ sf->use_fast_coef_updates = 2;
+ sf->use_fast_coef_costing = 1;
+ }
+ // Additions or changes for speed 3 and above
+ if (speed >= 4) {
+ sf->use_square_partition_only = 1;
+ sf->tx_size_search_method = USE_LARGESTALL;
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+
+ sf->adaptive_rd_thresh = 4;
+
+ // Add a couple more skip flags
+ sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH |
+ FLAG_EARLY_TERMINATE;
+
+ sf->disable_filter_search_var_thresh = 200;
+
+ sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
+ sf->use_lp32x32fdct = 1;
+ }
+ if (speed >= 5) {
+ sf->partition_search_type = FIXED_PARTITION;
+ sf->optimize_coefficients = 0;
+ sf->search_method = HEX;
+ sf->disable_filter_search_var_thresh = 500;
+ for (i = 0; i < TX_SIZES; i++) {
+ sf->intra_y_mode_mask[i] = INTRA_DC_ONLY;
+ sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+ }
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
+ }
+}
+
+static void set_rt_speed_feature(VP9_COMMON *cm,
+ SPEED_FEATURES *sf,
+ int speed) {
+ sf->static_segmentation = 0;
+ sf->adaptive_rd_thresh = 1;
+ sf->encode_breakout_thresh = 1;
+ sf->use_fast_coef_costing = 1;
+
+ if (speed == 1) {
+ sf->use_square_partition_only = !frame_is_intra_only(cm);
+ sf->less_rectangular_check = 1;
+ sf->tx_size_search_method =
+ frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
+
+ if (MIN(cm->width, cm->height) >= 720)
+ sf->disable_split_mask = cm->show_frame ?
+ DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ else
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+
+ sf->use_rd_breakout = 1;
+ sf->adaptive_motion_search = 1;
+ sf->adaptive_pred_interp_filter = 1;
+ sf->auto_mv_step_size = 1;
+ sf->adaptive_rd_thresh = 2;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->encode_breakout_thresh = 8;
+ }
+ if (speed >= 2) {
+ sf->use_square_partition_only = !frame_is_intra_only(cm);
+ sf->less_rectangular_check = 1;
+ sf->tx_size_search_method =
+ frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
+
+ if (MIN(cm->width, cm->height) >= 720)
+ sf->disable_split_mask = cm->show_frame ?
+ DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ else
+ sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
+
+ sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH
+ | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA
+ | FLAG_SKIP_INTRA_LOWVAR;
+
+ sf->use_rd_breakout = 1;
+ sf->adaptive_motion_search = 1;
+ sf->adaptive_pred_interp_filter = 2;
+ sf->auto_mv_step_size = 1;
+ sf->reference_masking = 1;
+
+ sf->disable_filter_search_var_thresh = 50;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+ sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+ sf->adjust_partitioning_from_last_frame = 1;
+ sf->last_partitioning_redo_frequency = 3;
+
+ sf->adaptive_rd_thresh = 2;
+ sf->use_lp32x32fdct = 1;
+ sf->mode_skip_start = 11;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->encode_breakout_thresh = 200;
+ }
+ if (speed >= 3) {
+ sf->use_square_partition_only = 1;
+ sf->disable_filter_search_var_thresh = 100;
+ sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
+ sf->use_uv_intra_rd_estimate = 1;
+ sf->skip_encode_sb = 1;
+ sf->subpel_iters_per_step = 1;
+ sf->use_fast_coef_updates = 2;
+ sf->adaptive_rd_thresh = 4;
+ sf->mode_skip_start = 6;
+ sf->allow_skip_recode = 0;
+ sf->optimize_coefficients = 0;
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->lpf_pick = LPF_PICK_FROM_Q;
+ sf->encode_breakout_thresh = 700;
+ }
+ if (speed >= 4) {
+ int i;
+ sf->last_partitioning_redo_frequency = 4;
+ sf->adaptive_rd_thresh = 5;
+ sf->use_fast_coef_costing = 0;
+ sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
+ sf->adjust_partitioning_from_last_frame =
+ cm->last_frame_type != cm->frame_type || (0 ==
+ (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
+ sf->subpel_force_stop = 1;
+ for (i = 0; i < TX_SIZES; i++) {
+ sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+ }
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
+ sf->frame_parameter_update = 0;
+ sf->encode_breakout_thresh = 1000;
+ sf->search_method = FAST_HEX;
+ sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
+ sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->max_intra_bsize = BLOCK_32X32;
+ sf->allow_skip_recode = 1;
+ }
+ if (speed >= 5) {
+ sf->max_partition_size = BLOCK_32X32;
+ sf->min_partition_size = BLOCK_8X8;
+ sf->partition_check =
+ (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1);
+ sf->partition_search_type = REFERENCE_PARTITION;
+ sf->use_nonrd_pick_mode = 1;
+ sf->search_method = FAST_DIAMOND;
+ sf->allow_skip_recode = 0;
+ }
+ if (speed >= 6) {
+ sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
+ sf->use_nonrd_pick_mode = 1;
+ sf->search_method = FAST_DIAMOND;
+ }
+ if (speed >= 7) {
+ int i;
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0)
+ }
+}
+
+void vp9_set_speed_features(VP9_COMP *cpi) {
+ SPEED_FEATURES *const sf = &cpi->sf;
+ VP9_COMMON *const cm = &cpi->common;
+ const int speed = cpi->speed < 0 ? -cpi->speed : cpi->speed;
+ int i;
+
+ // best quality defaults
+ sf->frame_parameter_update = 1;
+ sf->search_method = NSTEP;
+ sf->recode_loop = ALLOW_RECODE;
+ sf->subpel_search_method = SUBPEL_TREE;
+ sf->subpel_iters_per_step = 2;
+ sf->subpel_force_stop = 0;
+ sf->optimize_coefficients = !cpi->oxcf.lossless;
+ sf->reduce_first_step_size = 0;
+ sf->auto_mv_step_size = 0;
+ sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->comp_inter_joint_search_thresh = BLOCK_4X4;
+ sf->adaptive_rd_thresh = 0;
+ sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
+ sf->tx_size_search_method = USE_FULL_RD;
+ sf->use_lp32x32fdct = 0;
+ sf->adaptive_motion_search = 0;
+ sf->adaptive_pred_interp_filter = 0;
+ sf->reference_masking = 0;
+ sf->partition_search_type = SEARCH_PARTITION;
+ sf->less_rectangular_check = 0;
+ sf->use_square_partition_only = 0;
+ sf->auto_min_max_partition_size = NOT_IN_USE;
+ sf->max_partition_size = BLOCK_64X64;
+ sf->min_partition_size = BLOCK_4X4;
+ sf->adjust_partitioning_from_last_frame = 0;
+ sf->last_partitioning_redo_frequency = 4;
+ sf->disable_split_mask = 0;
+ sf->mode_search_skip_flags = 0;
+ sf->disable_split_var_thresh = 0;
+ sf->disable_filter_search_var_thresh = 0;
+ for (i = 0; i < TX_SIZES; i++) {
+ sf->intra_y_mode_mask[i] = ALL_INTRA_MODES;
+ sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES;
+ }
+ sf->use_rd_breakout = 0;
+ sf->skip_encode_sb = 0;
+ sf->use_uv_intra_rd_estimate = 0;
+ sf->allow_skip_recode = 0;
+ sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
+ sf->use_fast_coef_updates = 0;
+ sf->use_fast_coef_costing = 0;
+ sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
+ sf->use_nonrd_pick_mode = 0;
+ sf->encode_breakout_thresh = 0;
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->disable_inter_mode_mask[i] = 0;
+ sf->max_intra_bsize = BLOCK_64X64;
+ // This setting only takes effect when partition_search_type is set
+ // to FIXED_PARTITION.
+ sf->always_this_block_size = BLOCK_16X16;
+
+ switch (cpi->oxcf.mode) {
+ case MODE_BESTQUALITY:
+ case MODE_SECONDPASS_BEST: // This is the best quality mode.
+ cpi->diamond_search_sad = vp9_full_range_search;
+ break;
+ case MODE_FIRSTPASS:
+ case MODE_GOODQUALITY:
+ case MODE_SECONDPASS:
+ set_good_speed_feature(cpi, cm, sf, speed);
+ break;
+ case MODE_REALTIME:
+ set_rt_speed_feature(cm, sf, speed);
+ break;
+ }
+
+ // Slow quant, dct and trellis not worthwhile for first pass
+ // so make sure they are always turned off.
+ if (cpi->pass == 1) {
+ sf->optimize_coefficients = 0;
+ }
+
+ // No recode for 1 pass.
+ if (cpi->pass == 0) {
+ sf->recode_loop = DISALLOW_RECODE;
+ sf->optimize_coefficients = 0;
+ }
+
+ if (cpi->sf.subpel_search_method == SUBPEL_TREE) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
+ cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
+ }
+
+ cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
+
+ if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME &&
+ sf->encode_breakout_thresh > cpi->encode_breakout)
+ cpi->encode_breakout = sf->encode_breakout_thresh;
+
+ if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
+ sf->adaptive_pred_interp_filter = 0;
+}
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
new file mode 100644
index 000000000..5091e2b06
--- /dev/null
+++ b/vp9/encoder/vp9_speed_features.h
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_
+#define VP9_ENCODER_VP9_SPEED_FEATURES_H_
+
+#include "vp9/common/vp9_enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ DIAMOND = 0,
+ NSTEP = 1,
+ HEX = 2,
+ BIGDIA = 3,
+ SQUARE = 4,
+ FAST_HEX = 5,
+ FAST_DIAMOND = 6
+} SEARCH_METHODS;
+
+typedef enum {
+ // No recode.
+ DISALLOW_RECODE = 0,
+ // Allow recode for KF and exceeding maximum frame bandwidth.
+ ALLOW_RECODE_KFMAXBW = 1,
+ // Allow recode only for KF/ARF/GF frames.
+ ALLOW_RECODE_KFARFGF = 2,
+ // Allow recode for all frames based on bitrate constraints.
+ ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
+typedef enum {
+ SUBPEL_TREE = 0,
+ // Other methods to come
+} SUBPEL_SEARCH_METHODS;
+
+typedef enum {
+ LAST_FRAME_PARTITION_OFF = 0,
+ LAST_FRAME_PARTITION_LOW_MOTION = 1,
+ LAST_FRAME_PARTITION_ALL = 2
+} LAST_FRAME_PARTITION_METHOD;
+
+typedef enum {
+ USE_FULL_RD = 0,
+ USE_LARGESTINTRA,
+ USE_LARGESTINTRA_MODELINTER,
+ USE_LARGESTALL
+} TX_SIZE_SEARCH_METHOD;
+
+typedef enum {
+ NOT_IN_USE = 0,
+ RELAXED_NEIGHBORING_MIN_MAX = 1,
+ STRICT_NEIGHBORING_MIN_MAX = 2
+} AUTO_MIN_MAX_MODE;
+
+typedef enum {
+ // Try the full image with different values.
+ LPF_PICK_FROM_FULL_IMAGE,
+ // Try a small portion of the image with different values.
+ LPF_PICK_FROM_SUBIMAGE,
+ // Estimate the level based on quantizer and frame type
+ LPF_PICK_FROM_Q,
+} LPF_PICK_METHOD;
+
+typedef enum {
+ // Terminate search early based on distortion so far compared to
+ // qp step, distortion in the neighborhood of the frame, etc.
+ FLAG_EARLY_TERMINATE = 1 << 0,
+
+ // Skips comp inter modes if the best so far is an intra mode.
+ FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
+
+ // Skips comp inter modes if the best single intermode so far does
+ // not have the same reference as one of the two references being
+ // tested.
+ FLAG_SKIP_COMP_REFMISMATCH = 1 << 2,
+
+ // Skips oblique intra modes if the best so far is an inter mode.
+ FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
+
+ // Skips oblique intra modes at angles 27, 63, 117, 153 if the best
+ // intra so far is not one of the neighboring directions.
+ FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
+
+ // Skips intra modes other than DC_PRED if the source variance is small
+ FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
+} MODE_SEARCH_SKIP_LOGIC;
+
+typedef enum {
+ // Search partitions using RD/NONRD criterion
+ SEARCH_PARTITION = 0,
+
+ // Always use a fixed size partition
+ FIXED_PARTITION = 1,
+
+ // Use a fixed size partition in every 64X64 SB, where the size is
+ // determined based on source variance
+ VAR_BASED_FIXED_PARTITION = 2,
+
+ REFERENCE_PARTITION = 3,
+
+ // Use an arbitrary partitioning scheme based on source variance within
+ // a 64X64 SB
+ VAR_BASED_PARTITION
+} PARTITION_SEARCH_TYPE;
+
+typedef struct {
+ // Frame level coding parameter update
+ int frame_parameter_update;
+
+ // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
+ SEARCH_METHODS search_method;
+
+ RECODE_LOOP_TYPE recode_loop;
+
+ // Subpel_search_method can only be subpel_tree which does a subpixel
+ // logarithmic search that keeps stepping at 1/2 pixel units until
+ // you stop getting a gain, and then goes on to 1/4 and repeats
+ // the same process. Along the way it skips many diagonals.
+ SUBPEL_SEARCH_METHODS subpel_search_method;
+
+ // Maximum number of steps in logarithmic subpel search before giving up.
+ int subpel_iters_per_step;
+
+ // Control when to stop subpel search
+ int subpel_force_stop;
+
+ // This parameter controls the number of steps we'll do in a diamond
+ // search.
+ int max_step_search_steps;
+
+ // This parameter controls which step in the n-step process we start at.
+ // It's changed adaptively based on circumstances.
+ int reduce_first_step_size;
+
+ // If this is set to 1, we limit the motion search range to 2 times the
+ // largest motion vector found in the last frame.
+ int auto_mv_step_size;
+
+ // Trellis (dynamic programming) optimization of quantized values (+1, 0).
+ int optimize_coefficients;
+
+ // Always set to 0. If on it enables 0 cost background transmission
+ // (except for the initial transmission of the segmentation). The feature is
+ // disabled because the addition of very large block sizes make the
+ // backgrounds very to cheap to encode, and the segmentation we have
+ // adds overhead.
+ int static_segmentation;
+
+ // If 1 we iterate finding a best reference for 2 ref frames together - via
+ // a log search that iterates 4 times (check around mv for last for best
+ // error of combined predictor then check around mv for alt). If 0 we
+ // we just use the best motion vector found for each frame by itself.
+ int comp_inter_joint_search_thresh;
+
+ // This variable is used to cap the maximum number of times we skip testing a
+ // mode to be evaluated. A high value means we will be faster.
+ int adaptive_rd_thresh;
+
+ // Enables skipping the reconstruction step (idct, recon) in the
+ // intermediate steps assuming the last frame didn't have too many intra
+ // blocks and the q is less than a threshold.
+ int skip_encode_sb;
+ int skip_encode_frame;
+ // Speed feature to allow or disallow skipping of recode at block
+ // level within a frame.
+ int allow_skip_recode;
+
+ // This variable allows us to reuse the last frames partition choices
+ // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
+ // frame as a starting point in low motion scenes or always use it. If set
+ // we use last partitioning_redo frequency to determine how often to redo
+ // the partitioning from scratch. Adjust_partitioning_from_last_frame
+ // enables us to adjust up or down one partitioning from the last frames
+ // partitioning.
+ LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
+
+ // Determine which method we use to determine transform size. We can choose
+ // between options like full rd, largest for prediction size, largest
+ // for intra and model coefs for the rest.
+ TX_SIZE_SEARCH_METHOD tx_size_search_method;
+
+ // Low precision 32x32 fdct keeps everything in 16 bits and thus is less
+ // precise but significantly faster than the non lp version.
+ int use_lp32x32fdct;
+
+ // TODO(JBB): remove this as its no longer used.
+
+ // After looking at the first set of modes (set by index here), skip
+ // checking modes for reference frames that don't match the reference frame
+ // of the best so far.
+ int mode_skip_start;
+
+ // TODO(JBB): Remove this.
+ int reference_masking;
+
+ PARTITION_SEARCH_TYPE partition_search_type;
+
+ // Used if partition_search_type = FIXED_SIZE_PARTITION
+ BLOCK_SIZE always_this_block_size;
+
+ // Skip rectangular partition test when partition type none gives better
+ // rd than partition type split.
+ int less_rectangular_check;
+
+ // Disable testing non square partitions. (eg 16x32)
+ int use_square_partition_only;
+
+ // Sets min and max partition sizes for this 64x64 region based on the
+ // same 64x64 in last encoded frame, and the left and above neighbor.
+ AUTO_MIN_MAX_MODE auto_min_max_partition_size;
+
+ // Min and max partition size we enable (block_size) as per auto
+ // min max, but also used by adjust partitioning, and pick_partitioning.
+ BLOCK_SIZE min_partition_size;
+ BLOCK_SIZE max_partition_size;
+
+ // Whether or not we allow partitions one smaller or one greater than the last
+ // frame's partitioning. Only used if use_lastframe_partitioning is set.
+ int adjust_partitioning_from_last_frame;
+
+ // How frequently we re do the partitioning from scratch. Only used if
+ // use_lastframe_partitioning is set.
+ int last_partitioning_redo_frequency;
+
+ // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
+ // it always, to allow it for only Last frame and Intra, disable it for all
+ // inter modes or to enable it always.
+ int disable_split_mask;
+
+ // TODO(jingning): combine the related motion search speed features
+ // This allows us to use motion search at other sizes as a starting
+ // point for this motion search and limits the search range around it.
+ int adaptive_motion_search;
+
+ // Allows sub 8x8 modes to use the prediction filter that was determined
+ // best for 8x8 mode. If set to 0 we always re check all the filters for
+ // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
+ // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
+ int adaptive_pred_interp_filter;
+
+ // Search through variable block partition types in non-RD mode decision
+ // encoding process for RTC.
+ int partition_check;
+
+ // Implements various heuristics to skip searching modes
+ // The heuristics selected are based on flags
+ // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
+ unsigned int mode_search_skip_flags;
+
+ // A source variance threshold below which the split mode is disabled
+ unsigned int disable_split_var_thresh;
+
+ // A source variance threshold below which filter search is disabled
+ // Choose a very large value (UINT_MAX) to use 8-tap always
+ unsigned int disable_filter_search_var_thresh;
+
+ // These bit masks allow you to enable or disable intra modes for each
+ // transform size separately.
+ int intra_y_mode_mask[TX_SIZES];
+ int intra_uv_mode_mask[TX_SIZES];
+
+ // This variable enables an early break out of mode testing if the model for
+ // rd built from the prediction signal indicates a value that's much
+ // higher than the best rd we've seen so far.
+ int use_rd_breakout;
+
+ // This enables us to use an estimate for intra rd based on dc mode rather
+ // than choosing an actual uv mode in the stage of encoding before the actual
+ // final encode.
+ int use_uv_intra_rd_estimate;
+
+ // This feature controls how the loop filter level is determined.
+ LPF_PICK_METHOD lpf_pick;
+
+ // This feature limits the number of coefficients updates we actually do
+ // by only looking at counts from 1/2 the bands.
+ int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
+
+ // This flag controls the use of non-RD mode decision.
+ int use_nonrd_pick_mode;
+
+ // This variable sets the encode_breakout threshold. Currently, it is only
+ // enabled in real time mode.
+ int encode_breakout_thresh;
+
+ // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
+ // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
+ int disable_inter_mode_mask[BLOCK_SIZES];
+
+ // This feature controls whether we do the expensive context update and
+ // calculation in the rd coefficient costing loop.
+ int use_fast_coef_costing;
+
+ // This variable controls the maximum block size where intra blocks can be
+ // used in inter frames.
+ // TODO(aconverse): Fold this into one of the other many mode skips
+ BLOCK_SIZE max_intra_bsize;
+} SPEED_FEATURES;
+
+struct VP9_COMP;
+
+void vp9_set_speed_features(struct VP9_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_
+
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index eba7bc682..2b5639dcf 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -15,15 +15,23 @@
void vp9_init_layer_context(VP9_COMP *const cpi) {
const VP9_CONFIG *const oxcf = &cpi->oxcf;
- int temporal_layer = 0;
+ int layer;
+ int layer_end;
+
cpi->svc.spatial_layer_id = 0;
cpi->svc.temporal_layer_id = 0;
- for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
- ++temporal_layer) {
- LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+
+ if (cpi->svc.number_temporal_layers > 1) {
+ layer_end = cpi->svc.number_temporal_layers;
+ } else {
+ layer_end = cpi->svc.number_spatial_layers;
+ }
+
+ for (layer = 0; layer < layer_end; ++layer) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
+ lc->current_video_frame_in_layer = 0;
lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
- lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q];
lrc->total_actual_bits = 0;
lrc->total_target_vs_actual = 0;
@@ -35,11 +43,19 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lrc->decimation_factor = 0;
lrc->rate_correction_factor = 1.0;
lrc->key_frame_rate_correction_factor = 1.0;
- lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
- 1000;
- lrc->buffer_level =
- vp9_rescale((int)(oxcf->starting_buffer_level),
- lc->target_bandwidth, 1000);
+
+ if (cpi->svc.number_temporal_layers > 1) {
+ lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+ lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
+ } else {
+ lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+ lrc->last_q[0] = q_trans[oxcf->best_allowed_q];
+ lrc->last_q[1] = q_trans[oxcf->best_allowed_q];
+ lrc->last_q[2] = q_trans[oxcf->best_allowed_q];
+ }
+
+ lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level),
+ lc->target_bandwidth, 1000);
lrc->bits_off_target = lrc->buffer_level;
}
}
@@ -49,14 +65,26 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
const int target_bandwidth) {
const VP9_CONFIG *const oxcf = &cpi->oxcf;
const RATE_CONTROL *const rc = &cpi->rc;
- int temporal_layer = 0;
+ int layer;
+ int layer_end;
float bitrate_alloc = 1.0;
- for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
- ++temporal_layer) {
- LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+
+ if (cpi->svc.number_temporal_layers > 1) {
+ layer_end = cpi->svc.number_temporal_layers;
+ } else {
+ layer_end = cpi->svc.number_spatial_layers;
+ }
+
+ for (layer = 0; layer < layer_end; ++layer) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
- lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000;
- bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
+
+ if (cpi->svc.number_temporal_layers > 1) {
+ lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+ } else {
+ lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+ }
+ bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
// Update buffer-related quantities.
lc->starting_buffer_level =
(int64_t)(oxcf->starting_buffer_level * bitrate_alloc);
@@ -67,7 +95,11 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
// Update framerate-related quantities.
- lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+ if (cpi->svc.number_temporal_layers > 1) {
+ lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
+ } else {
+ lc->framerate = oxcf->framerate;
+ }
lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
// Update qp-related quantities.
@@ -76,34 +108,69 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
}
}
-void vp9_update_layer_framerate(VP9_COMP *const cpi) {
- int temporal_layer = cpi->svc.temporal_layer_id;
+static LAYER_CONTEXT *get_layer_context(SVC *svc) {
+ return svc->number_temporal_layers > 1 ?
+ &svc->layer_context[svc->temporal_layer_id] :
+ &svc->layer_context[svc->spatial_layer_id];
+}
+
+void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
+ const int layer = cpi->svc.temporal_layer_id;
const VP9_CONFIG *const oxcf = &cpi->oxcf;
- LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
RATE_CONTROL *const lrc = &lc->rc;
- lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+
+ lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
// Update the average layer frame size (non-cumulative per-frame-bw).
- if (temporal_layer == 0) {
+ if (layer == 0) {
lc->avg_frame_size = lrc->av_per_frame_bandwidth;
} else {
- double prev_layer_framerate = oxcf->framerate /
- oxcf->ts_rate_decimator[temporal_layer - 1];
- int prev_layer_target_bandwidth =
- oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+ const double prev_layer_framerate =
+ oxcf->framerate / oxcf->ts_rate_decimator[layer - 1];
+ const int prev_layer_target_bandwidth =
+ oxcf->ts_target_bitrate[layer - 1] * 1000;
lc->avg_frame_size =
(int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
(lc->framerate - prev_layer_framerate));
}
}
+void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
+ RATE_CONTROL *const lrc = &lc->rc;
+
+ lc->framerate = framerate;
+ lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->min_frame_bandwidth = (int)(lrc->av_per_frame_bandwidth *
+ oxcf->two_pass_vbrmin_section / 100);
+ lrc->max_frame_bandwidth = (int)(((int64_t)lrc->av_per_frame_bandwidth *
+ oxcf->two_pass_vbrmax_section) / 100);
+ lrc->max_gf_interval = 16;
+
+ lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+
+ if (oxcf->play_alternate && oxcf->lag_in_frames) {
+ if (lrc->max_gf_interval > oxcf->lag_in_frames - 1)
+ lrc->max_gf_interval = oxcf->lag_in_frames - 1;
+
+ if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
+
+ if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval)
+ lrc->max_gf_interval = lrc->static_scene_max_gf_interval;
+}
+
void vp9_restore_layer_context(VP9_COMP *const cpi) {
- int temporal_layer = cpi->svc.temporal_layer_id;
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
- int frame_since_key = cpi->rc.frames_since_key;
- int frame_to_key = cpi->rc.frames_to_key;
+ LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
+ const int old_frame_since_key = cpi->rc.frames_since_key;
+ const int old_frame_to_key = cpi->rc.frames_to_key;
+
cpi->rc = lc->rc;
+ cpi->twopass = lc->twopass;
cpi->oxcf.target_bandwidth = lc->target_bandwidth;
cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
@@ -111,17 +178,35 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
cpi->output_framerate = lc->framerate;
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
- cpi->rc.frames_since_key = frame_since_key;
- cpi->rc.frames_to_key = frame_to_key;
+ if (cpi->svc.number_temporal_layers > 1) {
+ cpi->rc.frames_since_key = old_frame_since_key;
+ cpi->rc.frames_to_key = old_frame_to_key;
+ }
}
void vp9_save_layer_context(VP9_COMP *const cpi) {
- int temporal_layer = cpi->svc.temporal_layer_id;
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
+
lc->rc = cpi->rc;
- lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth;
- lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
- lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
- lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
+ lc->twopass = cpi->twopass;
+ lc->target_bandwidth = (int)oxcf->target_bandwidth;
+ lc->starting_buffer_level = oxcf->starting_buffer_level;
+ lc->optimal_buffer_level = oxcf->optimal_buffer_level;
+ lc->maximum_buffer_size = oxcf->maximum_buffer_size;
lc->framerate = cpi->output_framerate;
}
+
+void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
+ int i;
+ for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+ struct twopass_rc *const twopass = &cpi->svc.layer_context[i].twopass;
+
+ cpi->svc.spatial_layer_id = i;
+ vp9_init_second_pass(cpi);
+
+ twopass->total_stats.spatial_layer_id = i;
+ twopass->total_left_stats.spatial_layer_id = i;
+ }
+ cpi->svc.spatial_layer_id = 0;
+}
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index c7a4c065c..e859a2fd5 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -27,6 +27,9 @@ typedef struct {
int64_t maximum_buffer_size;
double framerate;
int avg_frame_size;
+ struct twopass_rc twopass;
+ struct vpx_fixed_buf rc_twopass_stats_in;
+ unsigned int current_video_frame_in_layer;
} LAYER_CONTEXT;
typedef struct {
@@ -34,9 +37,10 @@ typedef struct {
int temporal_layer_id;
int number_spatial_layers;
int number_temporal_layers;
- // Layer context used for rate control in CBR mode, only defined for
- // temporal layers for now.
- LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
+ // Layer context used for rate control in one pass temporal CBR mode or
+ // two pass spatial mode. Defined for temporal or spatial layers for now.
+ // Does not support temporal combined with spatial RC.
+ LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)];
} SVC;
struct VP9_COMP;
@@ -49,8 +53,12 @@ void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi,
const int target_bandwidth);
// Prior to encoding the frame, update framerate-related quantities
-// for the current layer.
-void vp9_update_layer_framerate(struct VP9_COMP *const cpi);
+// for the current temporal layer.
+void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi);
+
+// Update framerate-related quantities for the current spatial layer.
+void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi,
+ double framerate);
// Prior to encoding the frame, set the layer context, for the current layer
// to be encoded, to the cpi struct.
@@ -59,6 +67,9 @@ void vp9_restore_layer_context(struct VP9_COMP *const cpi);
// Save the layer context after encoding the frame.
void vp9_save_layer_context(struct VP9_COMP *const cpi);
+// Initialize second pass rc for spatial svc.
+void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index bb5f1c23b..b6211e5fa 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -108,7 +108,7 @@ void vp9_coef_tree_initialize() {
vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
}
-static void fill_value_tokens() {
+void vp9_tokenize_initialize() {
TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
const vp9_extra_bit *const e = vp9_extra_bits;
@@ -162,7 +162,6 @@ struct tokenize_b_args {
VP9_COMP *cpi;
MACROBLOCKD *xd;
TOKENEXTRA **tp;
- uint8_t *token_cache;
};
static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -213,7 +212,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
VP9_COMP *cpi = args->cpi;
MACROBLOCKD *xd = args->xd;
TOKENEXTRA **tp = args->tp;
- uint8_t *token_cache = args->token_cache;
+ uint8_t token_cache[32 * 32];
struct macroblock_plane *p = &cpi->mb.plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
@@ -315,7 +314,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
const int ctx = vp9_get_skip_context(xd);
const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_SKIP);
- struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache};
+ struct tokenize_b_args arg = {cpi, xd, t};
if (mbmi->skip) {
if (!dry_run)
cm->counts.skip[ctx][1] += skip_inc;
@@ -333,7 +332,3 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
*t = t_backup;
}
}
-
-void vp9_tokenize_initialize() {
- fill_value_tokens();
-}
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 8bc385089..996f730ef 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -216,7 +216,7 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 64, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
- comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
+ vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
}
@@ -273,7 +273,7 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 32, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
- comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
+ vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
}
@@ -330,7 +330,7 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 32, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
- comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
+ vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
}
@@ -387,7 +387,7 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 16, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
- comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
+ vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
}
@@ -614,7 +614,7 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
// Now filter Verticaly
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
- comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
+ vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
}
@@ -658,7 +658,7 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
- comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
+ vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
@@ -703,7 +703,7 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
1, 17, 16, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
- comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
+ vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
}
@@ -747,7 +747,7 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 64, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
- comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
+ vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
}
@@ -791,7 +791,7 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 32, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
- comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
+ vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
}
@@ -955,7 +955,7 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 16, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
- comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
+ vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
}
@@ -999,7 +999,7 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
- comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
+ vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
@@ -1043,7 +1043,7 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
- comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
+ vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
@@ -1089,6 +1089,23 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 4, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
- comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
+ vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
}
+
+
+void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride) {
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ int tmp;
+ tmp = pred[j] + ref[j];
+ comp_pred[j] = (tmp + 1) >> 1;
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 3bc2091f8..62e20dc00 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -100,21 +100,9 @@ typedef struct vp9_variance_vtable {
vp9_sad_multi_d_fn_t sdx4df;
} vp9_variance_fn_ptr_t;
-static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride) {
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- int tmp;
- tmp = pred[j] + ref[j];
- comp_pred[j] = (tmp + 1) >> 1;
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
+void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_write_bit_buffer.c b/vp9/encoder/vp9_write_bit_buffer.c
new file mode 100644
index 000000000..962d0ca56
--- /dev/null
+++ b/vp9/encoder/vp9_write_bit_buffer.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/encoder/vp9_write_bit_buffer.h"
+
+size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
+ return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
+}
+
+void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
+ const int off = (int)wb->bit_offset;
+ const int p = off / CHAR_BIT;
+ const int q = CHAR_BIT - 1 - off % CHAR_BIT;
+ if (q == CHAR_BIT -1) {
+ wb->bit_buffer[p] = bit << q;
+ } else {
+ wb->bit_buffer[p] &= ~(1 << q);
+ wb->bit_buffer[p] |= bit << q;
+ }
+ wb->bit_offset = off + 1;
+}
+
+void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) {
+ int bit;
+ for (bit = bits - 1; bit >= 0; bit--)
+ vp9_wb_write_bit(wb, (data >> bit) & 1);
+}
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h
index 1795e05e4..073608d7f 100644
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ b/vp9/encoder/vp9_write_bit_buffer.h
@@ -24,29 +24,11 @@ struct vp9_write_bit_buffer {
size_t bit_offset;
};
-static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
- return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
-}
-
-static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
- const int off = (int)wb->bit_offset;
- const int p = off / CHAR_BIT;
- const int q = CHAR_BIT - 1 - off % CHAR_BIT;
- if (q == CHAR_BIT -1) {
- wb->bit_buffer[p] = bit << q;
- } else {
- wb->bit_buffer[p] &= ~(1 << q);
- wb->bit_buffer[p] |= bit << q;
- }
- wb->bit_offset = off + 1;
-}
-
-static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb,
- int data, int bits) {
- int bit;
- for (bit = bits - 1; bit >= 0; bit--)
- vp9_wb_write_bit(wb, (data >> bit) & 1);
-}
+size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb);
+
+void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit);
+
+void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits);
#ifdef __cplusplus
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
new file mode 100644
index 000000000..f31b176e5
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <immintrin.h> // AVX2
+#include "vpx/vpx_integer.h"
+
+void vp9_sad32x32x4d_avx2(uint8_t *src,
+ int src_stride,
+ uint8_t *ref[4],
+ int ref_stride,
+ unsigned int res[4]) {
+ __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
+ __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
+ __m256i sum_mlow, sum_mhigh;
+ int i;
+ uint8_t *ref0, *ref1, *ref2, *ref3;
+
+ ref0 = ref[0];
+ ref1 = ref[1];
+ ref2 = ref[2];
+ ref3 = ref[3];
+ sum_ref0 = _mm256_set1_epi16(0);
+ sum_ref1 = _mm256_set1_epi16(0);
+ sum_ref2 = _mm256_set1_epi16(0);
+ sum_ref3 = _mm256_set1_epi16(0);
+ for (i = 0; i < 32 ; i++) {
+ // load src and all refs
+ src_reg = _mm256_load_si256((__m256i *)(src));
+ ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
+ ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
+ ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
+ ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+ // sum of the absolute differences between every ref-i to src
+ ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
+ ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
+ ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
+ ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
+ // sum every ref-i
+ sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
+ sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
+ sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
+ sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
+
+ src+= src_stride;
+ ref0+= ref_stride;
+ ref1+= ref_stride;
+ ref2+= ref_stride;
+ ref3+= ref_stride;
+ }
+ {
+ __m128i sum;
+ // in sum_ref-i the result is saved in the first 4 bytes
+ // the other 4 bytes are zeroed.
+ // sum_ref1 and sum_ref3 are shifted left by 4 bytes
+ sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
+ sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
+
+ // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
+ sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
+ sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
+
+ // merge every 64 bit from each sum_ref-i
+ sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
+ sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
+
+ // add the low 64 bit to the high 64 bit
+ sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
+
+ // add the low 128 bit to the high 128 bit
+ sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
+ _mm256_extractf128_si256(sum_mlow, 1));
+
+ _mm_storeu_si128((__m128i *)(res), sum);
+ }
+}
+
+void vp9_sad64x64x4d_avx2(uint8_t *src,
+ int src_stride,
+ uint8_t *ref[4],
+ int ref_stride,
+ unsigned int res[4]) {
+ __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
+ __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg;
+ __m256i ref3_reg, ref3next_reg;
+ __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
+ __m256i sum_mlow, sum_mhigh;
+ int i;
+ uint8_t *ref0, *ref1, *ref2, *ref3;
+
+ ref0 = ref[0];
+ ref1 = ref[1];
+ ref2 = ref[2];
+ ref3 = ref[3];
+ sum_ref0 = _mm256_set1_epi16(0);
+ sum_ref1 = _mm256_set1_epi16(0);
+ sum_ref2 = _mm256_set1_epi16(0);
+ sum_ref3 = _mm256_set1_epi16(0);
+ for (i = 0; i < 64 ; i++) {
+ // load 64 bytes from src and all refs
+ src_reg = _mm256_load_si256((__m256i *)(src));
+ srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+ ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
+ ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
+ ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
+ ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32));
+ ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
+ ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32));
+ ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+ ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32));
+ // sum of the absolute differences between every ref-i to src
+ ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
+ ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
+ ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
+ ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
+ ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg);
+ ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg);
+ ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg);
+ ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg);
+
+ // sum every ref-i
+ sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
+ sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
+ sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
+ sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
+ sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg);
+ sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg);
+ sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg);
+ sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg);
+ src+= src_stride;
+ ref0+= ref_stride;
+ ref1+= ref_stride;
+ ref2+= ref_stride;
+ ref3+= ref_stride;
+ }
+ {
+ __m128i sum;
+
+ // in sum_ref-i the result is saved in the first 4 bytes
+ // the other 4 bytes are zeroed.
+ // sum_ref1 and sum_ref3 are shifted left by 4 bytes
+ sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
+ sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
+
+ // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
+ sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
+ sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
+
+ // merge every 64 bit from each sum_ref-i
+ sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
+ sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
+
+ // add the low 64 bit to the high 64 bit
+ sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
+
+ // add the low 128 bit to the high 128 bit
+ sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
+ _mm256_extractf128_si256(sum_mlow, 1));
+
+ _mm_storeu_si128((__m128i *)(res), sum);
+ }
+}
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index d3097e526..09e6b6781 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -152,10 +152,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
RANGE_CHECK_BOOL(extra_cfg, lossless);
- if (extra_cfg->lossless) {
- RANGE_CHECK_HI(cfg, rc_max_quantizer, 0);
- RANGE_CHECK_HI(cfg, rc_min_quantizer, 0);
- }
RANGE_CHECK(extra_cfg, aq_mode, 0, AQ_MODE_COUNT - 1);
RANGE_CHECK_HI(cfg, g_threads, 64);
@@ -225,14 +221,43 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
if (cfg->rc_twopass_stats_in.sz % packet_sz)
ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
- if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
- ERROR("rc_twopass_stats_in requires at least two packets.");
+ if (cfg->ss_number_layers > 1) {
+ int i;
+ unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = {0};
+
+ stats = cfg->rc_twopass_stats_in.buf;
+ for (i = 0; i < n_packets; ++i) {
+ const int layer_id = (int)stats[i].spatial_layer_id;
+ if (layer_id >= 0 && layer_id < (int)cfg->ss_number_layers) {
+ ++n_packets_per_layer[layer_id];
+ }
+ }
+
+ for (i = 0; i < (int)cfg->ss_number_layers; ++i) {
+ unsigned int layer_id;
+ if (n_packets_per_layer[i] < 2) {
+ ERROR("rc_twopass_stats_in requires at least two packets for each "
+ "layer.");
+ }
+
+ stats = (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf +
+ n_packets - cfg->ss_number_layers + i;
+ layer_id = (int)stats->spatial_layer_id;
+
+ if (layer_id >= cfg->ss_number_layers
+ ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1)
+ ERROR("rc_twopass_stats_in missing EOS stats packet");
+ }
+ } else {
+ if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
+ ERROR("rc_twopass_stats_in requires at least two packets.");
- stats =
- (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1;
+ stats =
+ (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1;
- if ((int)(stats->count + 0.5) != n_packets - 1)
- ERROR("rc_twopass_stats_in missing EOS stats packet");
+ if ((int)(stats->count + 0.5) != n_packets - 1)
+ ERROR("rc_twopass_stats_in missing EOS stats packet");
+ }
}
return VPX_CODEC_OK;
@@ -807,48 +832,25 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx,
if (cpi->droppable)
pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE;
- /*if (cpi->output_partition)
- {
- int i;
- const int num_partitions = 1;
-
- pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT;
-
- for (i = 0; i < num_partitions; ++i)
- {
- pkt.data.frame.buf = cx_data;
- pkt.data.frame.sz = cpi->partition_sz[i];
- pkt.data.frame.partition_id = i;
- // don't set the fragment bit for the last partition
- if (i == (num_partitions - 1))
- pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT;
- vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
- cx_data += cpi->partition_sz[i];
- cx_data_sz -= cpi->partition_sz[i];
- }
- }
- else*/
- {
- if (ctx->pending_cx_data) {
- ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
- ctx->pending_frame_magnitude |= size;
- ctx->pending_cx_data_sz += size;
- size += write_superframe_index(ctx);
- pkt.data.frame.buf = ctx->pending_cx_data;
- pkt.data.frame.sz = ctx->pending_cx_data_sz;
- ctx->pending_cx_data = NULL;
- ctx->pending_cx_data_sz = 0;
- ctx->pending_frame_count = 0;
- ctx->pending_frame_magnitude = 0;
- } else {
- pkt.data.frame.buf = cx_data;
- pkt.data.frame.sz = size;
- }
- pkt.data.frame.partition_id = -1;
- vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
- cx_data += size;
- cx_data_sz -= size;
+ if (ctx->pending_cx_data) {
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ ctx->pending_frame_magnitude |= size;
+ ctx->pending_cx_data_sz += size;
+ size += write_superframe_index(ctx);
+ pkt.data.frame.buf = ctx->pending_cx_data;
+ pkt.data.frame.sz = ctx->pending_cx_data_sz;
+ ctx->pending_cx_data = NULL;
+ ctx->pending_cx_data_sz = 0;
+ ctx->pending_frame_count = 0;
+ ctx->pending_frame_magnitude = 0;
+ } else {
+ pkt.data.frame.buf = cx_data;
+ pkt.data.frame.sz = size;
}
+ pkt.data.frame.partition_id = -1;
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+ cx_data += size;
+ cx_data_sz -= size;
}
}
}
@@ -963,8 +965,8 @@ static vpx_codec_err_t vp9e_update_entropy(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t vp9e_update_reference(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
- const int update = va_arg(args, int);
- vp9_update_reference(ctx->cpi, update);
+ const int ref_frame_flags = va_arg(args, int);
+ vp9_update_reference(ctx->cpi, ref_frame_flags);
return VPX_CODEC_OK;
}
@@ -1017,9 +1019,12 @@ static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
va_list args) {
int data = va_arg(args, int);
vp9_set_svc(ctx->cpi, data);
- // CBR mode for SVC with both temporal and spatial layers not yet supported.
+ // CBR or two pass mode for SVC with both temporal and spatial layers
+ // not yet supported.
if (data == 1 &&
- ctx->cfg.rc_end_usage == VPX_CBR &&
+ (ctx->cfg.rc_end_usage == VPX_CBR ||
+ ctx->cfg.g_pass == VPX_RC_FIRST_PASS ||
+ ctx->cfg.g_pass == VPX_RC_LAST_PASS) &&
ctx->cfg.ss_number_layers > 1 &&
ctx->cfg.ts_number_layers > 1) {
return VPX_CODEC_INVALID_PARAM;
@@ -1176,8 +1181,7 @@ static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = {
CODEC_INTERFACE(vpx_codec_vp9_cx) = {
"WebM Project VP9 Encoder" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
- VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR |
- VPX_CODEC_CAP_OUTPUT_PARTITION,
+ VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
/* vpx_codec_caps_t caps; */
vp9e_init, /* vpx_codec_init_fn_t init; */
vp9e_destroy, /* vpx_codec_destroy_fn_t destroy; */
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 72701d9d1..271589c6b 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -132,7 +132,8 @@ static vpx_codec_err_t vp9_init(vpx_codec_ctx_t *ctx,
static vpx_codec_err_t vp9_destroy(vpx_codec_alg_priv_t *ctx) {
int i;
- vp9_remove_decompressor(ctx->pbi);
+ if (ctx->pbi)
+ vp9_remove_decompressor(ctx->pbi);
for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) {
if (ctx->mmaps[i].dtor)
@@ -280,7 +281,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
oxcf.width = ctx->si.w;
oxcf.height = ctx->si.h;
oxcf.version = 9;
- oxcf.postprocess = 0;
oxcf.max_threads = ctx->cfg.threads;
oxcf.inv_tile_order = ctx->invert_tile_order;
optr = vp9_create_decompressor(&oxcf);
@@ -325,6 +325,8 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
}
if (!res && ctx->pbi) {
+ VP9D_COMP *const pbi = ctx->pbi;
+ VP9_COMMON *const cm = &pbi->common;
YV12_BUFFER_CONFIG sd;
int64_t time_stamp = 0, time_end_stamp = 0;
vp9_ppflags_t flags = {0};
@@ -349,15 +351,11 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
#endif
}
- if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) {
- VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
- res = update_error_state(ctx, &pbi->common.error);
- }
+ if (vp9_receive_compressed_data(pbi, data_sz, data, deadline))
+ res = update_error_state(ctx, &cm->error);
- if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
+ if (!res && 0 == vp9_get_raw_frame(pbi, &sd, &time_stamp,
&time_end_stamp, &flags)) {
- VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
- VP9_COMMON *const cm = &pbi->common;
yuvconfig2image(&ctx->img, &sd, user_priv);
ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
@@ -569,14 +567,14 @@ static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx,
static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
va_list args) {
- vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+ vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *);
if (data) {
- vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data;
YV12_BUFFER_CONFIG sd;
image2yuvconfig(&frame->img, &sd);
- return vp9_set_reference_dec(ctx->pbi,
+ return vp9_set_reference_dec(&ctx->pbi->common,
(VP9_REFFRAME)frame->frame_type, &sd);
} else {
return VPX_CODEC_INVALID_PARAM;
@@ -654,12 +652,13 @@ static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id,
static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
int ctrl_id, va_list args) {
- int *update_info = va_arg(args, int *);
- VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
+ int *const update_info = va_arg(args, int *);
if (update_info) {
- *update_info = pbi->refresh_frame_flags;
-
+ if (ctx->pbi)
+ *update_info = ctx->pbi->refresh_frame_flags;
+ else
+ return VPX_CODEC_ERROR;
return VPX_CODEC_OK;
} else {
return VPX_CODEC_INVALID_PARAM;
@@ -672,9 +671,8 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
int *corrupted = va_arg(args, int *);
if (corrupted) {
- VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
- if (pbi)
- *corrupted = pbi->common.frame_to_show->corrupted;
+ if (ctx->pbi)
+ *corrupted = ctx->pbi->common.frame_to_show->corrupted;
else
return VPX_CODEC_ERROR;
return VPX_CODEC_OK;
@@ -688,10 +686,10 @@ static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx,
int *const display_size = va_arg(args, int *);
if (display_size) {
- const VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
- if (pbi) {
- display_size[0] = pbi->common.display_width;
- display_size[1] = pbi->common.display_height;
+ if (ctx->pbi) {
+ const VP9_COMMON *const cm = &ctx->pbi->common;
+ display_size[0] = cm->display_width;
+ display_size[1] = cm->display_height;
} else {
return VPX_CODEC_ERROR;
}
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index b14e7e5ce..21a388e51 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -30,6 +30,7 @@ VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
VP9_CX_SRCS-yes += encoder/vp9_block.h
VP9_CX_SRCS-yes += encoder/vp9_writer.h
VP9_CX_SRCS-yes += encoder/vp9_writer.c
+VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.c
VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h
VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
@@ -59,6 +60,8 @@ VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
VP9_CX_SRCS-yes += encoder/vp9_sad.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
+VP9_CX_SRCS-yes += encoder/vp9_speed_features.c
+VP9_CX_SRCS-yes += encoder/vp9_speed_features.h
VP9_CX_SRCS-yes += encoder/vp9_subexp.c
VP9_CX_SRCS-yes += encoder/vp9_subexp.h
VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c
@@ -68,10 +71,12 @@ VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
VP9_CX_SRCS-yes += encoder/vp9_variance.c
-VP9_CX_SRCS-yes += encoder/vp9_vaq.c
-VP9_CX_SRCS-yes += encoder/vp9_vaq.h
-VP9_CX_SRCS-yes += encoder/vp9_craq.c
-VP9_CX_SRCS-yes += encoder/vp9_craq.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
@@ -89,6 +94,7 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm