summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm68
-rw-r--r--vp9/common/vp9_blockd.h4
-rw-r--r--vp9/common/vp9_convolve.c8
-rw-r--r--vp9/common/vp9_convolve.h2
-rw-r--r--vp9/common/vp9_entropymode.c44
-rw-r--r--vp9/common/vp9_entropymode.h18
-rw-r--r--vp9/common/vp9_entropymv.h2
-rw-r--r--vp9/common/vp9_filter.h2
-rw-r--r--vp9/common/vp9_onyxc_int.h20
-rw-r--r--vp9/common/vp9_postproc.c2
-rw-r--r--vp9/common/vp9_pred_common.c14
-rw-r--r--vp9/common/vp9_reconintra.c2
-rw-r--r--vp9/common/vp9_rtcd_defs.sh4
-rw-r--r--vp9/common/vp9_scale.c10
-rw-r--r--vp9/common/vp9_scale.h14
-rw-r--r--vp9/common/vp9_subpelvar.h4
-rw-r--r--vp9/decoder/vp9_dboolhuff.c14
-rw-r--r--vp9/decoder/vp9_dboolhuff.h4
-rw-r--r--vp9/decoder/vp9_decodemv.c42
-rw-r--r--vp9/decoder/vp9_decodframe.c20
-rw-r--r--vp9/encoder/vp9_bitstream.c78
-rw-r--r--vp9/encoder/vp9_block.h8
-rw-r--r--vp9/encoder/vp9_encodeframe.c20
-rw-r--r--vp9/encoder/vp9_encodemv.c18
-rw-r--r--vp9/encoder/vp9_modecosts.c10
-rw-r--r--vp9/encoder/vp9_onyx_if.c54
-rw-r--r--vp9/encoder/vp9_onyx_int.h38
-rw-r--r--vp9/encoder/vp9_rdopt.c135
-rw-r--r--vp9/encoder/vp9_variance_c.c104
-rw-r--r--vp9/encoder/x86/vp9_subpel_variance.asm12
-rw-r--r--vp9/encoder/x86/vp9_variance_impl_mmx.asm16
-rw-r--r--vp9/vp9_common.mk1
32 files changed, 420 insertions, 372 deletions
diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
new file mode 100644
index 000000000..869ee5f3f
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
@@ -0,0 +1,68 @@
+;
+; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp9_short_idct4x4_1_add_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
+; int dest_stride)
+;
+; r0 int16_t input
+; r1 uint8_t *dest
+; r2 int dest_stride)
+
+|vp9_short_idct4x4_1_add_neon| PROC
+ ldrsh r0, [r0]
+
+ ; generate cospi_16_64 = 11585
+ mov r12, #0x2d00
+ add r12, #0x41
+
+ ; out = dct_const_round_shift(input[0] * cospi_16_64)
+ mul r0, r0, r12 ; input[0] * cospi_16_64
+ add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
+ asr r0, r0, #14 ; >> DCT_CONST_BITS
+
+ ; out = dct_const_round_shift(out * cospi_16_64)
+ mul r0, r0, r12 ; out * cospi_16_64
+ mov r12, r1 ; save dest
+ add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
+ asr r0, r0, #14 ; >> DCT_CONST_BITS
+
+ ; a1 = ROUND_POWER_OF_TWO(out, 4)
+ add r0, r0, #8 ; + (1 <<((4) - 1))
+ asr r0, r0, #4 ; >> 4
+
+ vdup.s16 q0, r0 ; duplicate a1
+
+ vld1.32 {d2[0]}, [r1], r2
+ vld1.32 {d2[1]}, [r1], r2
+ vld1.32 {d4[0]}, [r1], r2
+ vld1.32 {d4[1]}, [r1]
+
+ vaddw.u8 q8, q0, d2 ; dest[x] + a1
+ vaddw.u8 q9, q0, d4
+
+ vqmovun.s16 d6, q8 ; clip_pixel
+ vqmovun.s16 d7, q9
+
+ vst1.32 {d6[0]}, [r12], r2
+ vst1.32 {d6[1]}, [r12], r2
+ vst1.32 {d7[0]}, [r12], r2
+ vst1.32 {d7[1]}, [r12]
+
+ bx lr
+ ENDP ; |vp9_short_idct4x4_1_add_neon|
+
+ END
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index dfd152646..9a42ad9f1 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -89,9 +89,9 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWMV;
}
-#define VP9_INTRA_MODES (TM_PRED + 1)
+#define INTRA_MODES (TM_PRED + 1)
-#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
return (mode - NEARESTMV);
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 1d9684992..1e6cd4404 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -49,7 +49,7 @@ static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
for (k = 0; k < taps; ++k)
sum += src[src_x + k] * filter_x[k];
- dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS));
+ dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
/* Move to the next source pixel */
x_q4 += x_step_q4;
@@ -91,7 +91,7 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
sum += src[src_x + k] * filter_x[k];
dst[x] = ROUND_POWER_OF_TWO(dst[x] +
- clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1);
+ clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
/* Move to the next source pixel */
x_q4 += x_step_q4;
@@ -133,7 +133,7 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
sum += src[(src_y + k) * src_stride] * filter_y[k];
dst[y * dst_stride] =
- clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS));
+ clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
/* Move to the next source pixel */
y_q4 += y_step_q4;
@@ -175,7 +175,7 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
sum += src[(src_y + k) * src_stride] * filter_y[k];
dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
- clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1);
+ clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
/* Move to the next source pixel */
y_q4 += y_step_q4;
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
index 9522b78bc..13220e97e 100644
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -13,7 +13,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-#define VP9_FILTER_BITS 7
+#define FILTER_BITS 7
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 84d090c31..a75d1a9a4 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -14,8 +14,8 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_seg_common.h"
-const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
- [VP9_INTRA_MODES - 1] = {
+const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES]
+ [INTRA_MODES - 1] = {
{ 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */,
{ 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */,
{ 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */,
@@ -29,15 +29,15 @@ const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
};
static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
- [VP9_INTRA_MODES - 1] = {
+ [INTRA_MODES - 1] = {
{ 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */,
{ 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */,
{ 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */,
{ 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */
};
-static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
- [VP9_INTRA_MODES - 1] = {
+static const vp9_prob default_if_uv_probs[INTRA_MODES]
+ [INTRA_MODES - 1] = {
{ 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */,
{ 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */,
{ 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */,
@@ -98,9 +98,9 @@ static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
}
};
-const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
- [VP9_INTRA_MODES]
- [VP9_INTRA_MODES - 1] = {
+const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES]
+ [INTRA_MODES]
+ [INTRA_MODES - 1] = {
{ /* above = dc */
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */,
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */,
@@ -215,7 +215,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
};
static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
- [VP9_INTER_MODES - 1] = {
+ [INTER_MODES - 1] = {
{2, 173, 34}, // 0 = both zero mv
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
{7, 166, 63}, // 2 = two predicted mvs
@@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
};
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
-const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
+const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = {
-DC_PRED, 2, /* 0 = DC_NODE */
-TM_PRED, 4, /* 1 = TM_NODE */
-V_PRED, 6, /* 2 = V_NODE */
@@ -250,8 +250,8 @@ const vp9_tree_index vp9_partition_tree[6] = {
-PARTITION_VERT, -PARTITION_SPLIT
};
-struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
-struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
+struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
+struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
@@ -317,8 +317,8 @@ static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
192, 128, 64
};
-static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1]
- [VP9_SWITCHABLE_FILTERS-1] = {
+static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTERS+1]
+ [SWITCHABLE_FILTERS-1] = {
{ 235, 162, },
{ 36, 255, },
{ 34, 3, },
@@ -338,11 +338,11 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
}
-const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
+const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = {
-EIGHTTAP, 2,
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
};
-struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
void vp9_entropy_mode_init() {
vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree);
@@ -400,17 +400,17 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
counts->single_ref[i][j]);
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
- update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree,
+ update_mode_probs(INTER_MODES, vp9_inter_mode_tree,
counts->inter_mode[i], pre_fc->inter_mode_probs[i],
fc->inter_mode_probs[i], NEARESTMV);
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
- update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
+ update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
counts->y_mode[i], pre_fc->y_mode_prob[i],
fc->y_mode_prob[i], 0);
- for (i = 0; i < VP9_INTRA_MODES; ++i)
- update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
+ for (i = 0; i < INTRA_MODES; ++i)
+ update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
counts->uv_mode[i], pre_fc->uv_mode_prob[i],
fc->uv_mode_prob[i], 0);
@@ -421,8 +421,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
fc->partition_prob[INTER_FRAME][i], 0);
if (cm->mcomp_filter_type == SWITCHABLE) {
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
- update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
counts->switchable_interp[i],
pre_fc->switchable_interp_prob[i],
fc->switchable_interp_prob[i], 0);
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index fced2cdfe..2f8085df6 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -16,8 +16,8 @@
#define SUBMVREF_COUNT 5
#define TX_SIZE_CONTEXTS 2
-#define VP9_MODE_UPDATE_PROB 252
-#define VP9_SWITCHABLE_FILTERS 3 // number of switchable filters
+#define MODE_UPDATE_PROB 252
+#define SWITCHABLE_FILTERS 3 // number of switchable filters
// #define MODE_STATS
@@ -35,24 +35,24 @@ struct tx_counts {
unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
};
-extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
-extern const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES]
- [VP9_INTRA_MODES - 1];
+extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
+ [INTRA_MODES - 1];
extern const vp9_tree_index vp9_intra_mode_tree[];
extern const vp9_tree_index vp9_inter_mode_tree[];
-extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
-extern struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
+extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
+extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
// probability models for partition information
extern const vp9_tree_index vp9_partition_tree[];
extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
extern const vp9_tree_index vp9_switchable_interp_tree
- [2 * (VP9_SWITCHABLE_FILTERS - 1)];
+ [2 * (SWITCHABLE_FILTERS - 1)];
-extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
void vp9_entropy_mode_init();
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 85a1f3aa0..92581da2c 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -24,7 +24,7 @@ void vp9_init_mv_probs(struct VP9Common *cm);
void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
int vp9_use_mv_hp(const MV *ref);
-#define VP9_NMV_UPDATE_PROB 252
+#define NMV_UPDATE_PROB 252
/* Symbols for coding which components are zero jointly */
#define MV_JOINTS 4
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 33a97ccb3..7b1ffaeda 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -27,7 +27,7 @@ extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][SUBPEL_TAPS];
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
// filter kernel as a 2 tap filter.
-#define VP9_BILINEAR_FILTERS_2TAP(x) \
+#define BILINEAR_FILTERS_2TAP(x) \
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
#endif // VP9_COMMON_VP9_FILTER_H_
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d40f04f95..a669cc5e7 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -38,14 +38,14 @@
#define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LOG2)
typedef struct frame_contexts {
- vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES - 1];
- vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
+ vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+ vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
[PARTITION_TYPES - 1];
vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
- vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
- [VP9_SWITCHABLE_FILTERS - 1];
- vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
+ vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
+ [SWITCHABLE_FILTERS - 1];
+ vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
vp9_prob single_ref_prob[REF_CONTEXTS][2];
@@ -56,15 +56,15 @@ typedef struct frame_contexts {
} FRAME_CONTEXT;
typedef struct {
- unsigned int y_mode[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES];
- unsigned int uv_mode[VP9_INTRA_MODES][VP9_INTRA_MODES];
+ unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
+ unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
[COEF_BANDS][PREV_COEF_CONTEXTS];
- unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1]
- [VP9_SWITCHABLE_FILTERS];
- unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES];
+ unsigned int switchable_interp[SWITCHABLE_FILTERS + 1]
+ [SWITCHABLE_FILTERS];
+ unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
unsigned int single_ref[REF_CONTEXTS][2][2];
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index 58e0e1d67..859c99ed5 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -53,7 +53,7 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {
{ RGB_TO_YUV(0xCC33FF) }, /* Magenta */
};
-static const unsigned char B_PREDICTION_MODE_colors[VP9_INTRA_MODES][3] = {
+static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = {
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index b00f58392..97ccb1376 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -31,25 +31,25 @@ unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
const int left_mv_pred = is_inter_mode(left_mbmi->mode);
const int left_interp = left_in_image && left_mv_pred
? left_mbmi->interp_filter
- : VP9_SWITCHABLE_FILTERS;
+ : SWITCHABLE_FILTERS;
// above
const int above_mv_pred = is_inter_mode(above_mbmi->mode);
const int above_interp = above_in_image && above_mv_pred
? above_mbmi->interp_filter
- : VP9_SWITCHABLE_FILTERS;
+ : SWITCHABLE_FILTERS;
if (left_interp == above_interp)
return left_interp;
- else if (left_interp == VP9_SWITCHABLE_FILTERS &&
- above_interp != VP9_SWITCHABLE_FILTERS)
+ else if (left_interp == SWITCHABLE_FILTERS &&
+ above_interp != SWITCHABLE_FILTERS)
return above_interp;
- else if (left_interp != VP9_SWITCHABLE_FILTERS &&
- above_interp == VP9_SWITCHABLE_FILTERS)
+ else if (left_interp != SWITCHABLE_FILTERS &&
+ above_interp == SWITCHABLE_FILTERS)
return left_interp;
else
- return VP9_SWITCHABLE_FILTERS;
+ return SWITCHABLE_FILTERS;
}
// Returns a context number for the given MB prediction signal
unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) {
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index f1d855695..4a451b909 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -285,7 +285,7 @@ intra_pred_allsizes(dc)
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
-static intra_pred_fn pred[VP9_INTRA_MODES][4];
+static intra_pred_fn pred[INTRA_MODES][4];
static intra_pred_fn dc_pred[2][2][4];
static void init_intra_pred_fn_ptrs(void) {
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index d075443ed..30c1b26d0 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -295,7 +295,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon
# dct
#
prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_1_add sse2
+specialize vp9_short_idct4x4_1_add sse2 neon
prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_add sse2 neon
@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
specialize vp9_quantize_b $ssse3_x86_64
prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 $ssse3_x86_64
+specialize vp9_quantize_b_32x32
#
# Structured Similarity (SSIM)
diff --git a/vp9/common/vp9_scale.c b/vp9/common/vp9_scale.c
index 0b8dc23ea..989206c60 100644
--- a/vp9/common/vp9_scale.c
+++ b/vp9/common/vp9_scale.c
@@ -13,11 +13,11 @@
#include "vp9/common/vp9_scale.h"
static INLINE int scaled_x(int val, const struct scale_factors *scale) {
- return val * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT;
+ return val * scale->x_scale_fp >> REF_SCALE_SHIFT;
}
static INLINE int scaled_y(int val, const struct scale_factors *scale) {
- return val * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT;
+ return val * scale->y_scale_fp >> REF_SCALE_SHIFT;
}
static int unscaled_value(int val, const struct scale_factors *scale) {
@@ -58,7 +58,7 @@ static int get_fixed_point_scale_factor(int other_size, int this_size) {
// and use fixed point scaling factors in decoding and encoding routines.
// Hardware implementations can calculate scale factor in device driver
// and use multiplication and shifting on hardware instead of division.
- return (other_size << VP9_REF_SCALE_SHIFT) / this_size;
+ return (other_size << REF_SCALE_SHIFT) / this_size;
}
static int check_scale_factors(int other_w, int other_h,
@@ -73,8 +73,8 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
int other_w, int other_h,
int this_w, int this_h) {
if (!check_scale_factors(other_w, other_h, this_w, this_h)) {
- scale->x_scale_fp = VP9_REF_INVALID_SCALE;
- scale->y_scale_fp = VP9_REF_INVALID_SCALE;
+ scale->x_scale_fp = REF_INVALID_SCALE;
+ scale->y_scale_fp = REF_INVALID_SCALE;
return;
}
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 827ae9bce..7a720d035 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -14,9 +14,9 @@
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_convolve.h"
-#define VP9_REF_SCALE_SHIFT 14
-#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT)
-#define VP9_REF_INVALID_SCALE -1
+#define REF_SCALE_SHIFT 14
+#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
+#define REF_INVALID_SCALE -1
struct scale_factors {
int x_scale_fp; // horizontal fixed point scale factor
@@ -39,13 +39,13 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
int this_w, int this_h);
static int vp9_is_valid_scale(const struct scale_factors *sf) {
- return sf->x_scale_fp != VP9_REF_INVALID_SCALE &&
- sf->y_scale_fp != VP9_REF_INVALID_SCALE;
+ return sf->x_scale_fp != REF_INVALID_SCALE &&
+ sf->y_scale_fp != REF_INVALID_SCALE;
}
static int vp9_is_scaled(const struct scale_factors *sf) {
- return sf->x_scale_fp != VP9_REF_NO_SCALE ||
- sf->y_scale_fp != VP9_REF_NO_SCALE;
+ return sf->x_scale_fp != REF_NO_SCALE ||
+ sf->y_scale_fp != REF_NO_SCALE;
}
#endif // VP9_COMMON_VP9_SCALE_H_
diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h
index 78d42359b..fe75481f6 100644
--- a/vp9/common/vp9_subpelvar.h
+++ b/vp9/common/vp9_subpelvar.h
@@ -81,7 +81,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
for (j = 0; j < output_width; j++) {
output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
(int)src_ptr[pixel_step] * vp9_filter[1],
- VP9_FILTER_BITS);
+ FILTER_BITS);
src_ptr++;
}
@@ -133,7 +133,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
for (j = 0; j < output_width; j++) {
output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
(int)src_ptr[pixel_step] * vp9_filter[1],
- VP9_FILTER_BITS);
+ FILTER_BITS);
src_ptr++;
}
diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c
index 31b1ae2b0..06acec4db 100644
--- a/vp9/decoder/vp9_dboolhuff.c
+++ b/vp9/decoder/vp9_dboolhuff.c
@@ -16,7 +16,7 @@
// This is meant to be a large, positive constant that can still be efficiently
// loaded as an immediate (on platforms like ARM, for example).
// Even relatively modest values like 100 would work fine.
-#define VP9_LOTS_OF_BITS 0x40000000
+#define LOTS_OF_BITS 0x40000000
int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size) {
@@ -41,13 +41,13 @@ void vp9_reader_fill(vp9_reader *r) {
const uint8_t *buffer = r->buffer;
VP9_BD_VALUE value = r->value;
int count = r->count;
- int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8);
+ int shift = BD_VALUE_SIZE - 8 - (count + 8);
int loop_end = 0;
const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT);
const int x = shift + CHAR_BIT - bits_left;
if (x >= 0) {
- count += VP9_LOTS_OF_BITS;
+ count += LOTS_OF_BITS;
loop_end = x;
}
@@ -66,7 +66,7 @@ void vp9_reader_fill(vp9_reader *r) {
const uint8_t *vp9_reader_find_end(vp9_reader *r) {
// Find the end of the coded buffer
- while (r->count > CHAR_BIT && r->count < VP9_BD_VALUE_SIZE) {
+ while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) {
r->count -= CHAR_BIT;
r->buffer--;
}
@@ -83,10 +83,10 @@ int vp9_reader_has_error(vp9_reader *r) {
//
// When reading a byte from the user's buffer, count is filled with 8 and
// one byte is filled into the value buffer. When we reach the end of the
- // data, count is additionally filled with VP9_LOTS_OF_BITS. So when
- // count == VP9_LOTS_OF_BITS - 1, the user's data has been exhausted.
+ // data, count is additionally filled with LOTS_OF_BITS. So when
+ // count == LOTS_OF_BITS - 1, the user's data has been exhausted.
//
// 1 if we have tried to decode bits after the end of stream was encountered.
// 0 No error.
- return r->count > VP9_BD_VALUE_SIZE && r->count < VP9_LOTS_OF_BITS;
+ return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS;
}
diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h
index c46dd73a3..c86451649 100644
--- a/vp9/decoder/vp9_dboolhuff.h
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -20,7 +20,7 @@
typedef size_t VP9_BD_VALUE;
-#define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
+#define BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
typedef struct {
const uint8_t *buffer_end;
@@ -52,7 +52,7 @@ static int vp9_read(vp9_reader *br, int probability) {
value = br->value;
count = br->count;
- bigsplit = (VP9_BD_VALUE)split << (VP9_BD_VALUE_SIZE - 8);
+ bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8);
range = split;
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 747877d80..d1c59c364 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -251,7 +251,7 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
}
static void update_mv(vp9_reader *r, vp9_prob *p) {
- if (vp9_read(r, VP9_NMV_UPDATE_PROB))
+ if (vp9_read(r, NMV_UPDATE_PROB))
*p = (vp9_read_literal(r, 7) << 1) | 1;
}
@@ -345,17 +345,17 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r,
static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
int i, j;
- for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j)
- for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j)
+ for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
}
static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
int i, j;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- for (j = 0; j < VP9_INTER_MODES - 1; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ for (j = 0; j < INTER_MODES - 1; ++j)
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
}
@@ -551,22 +551,14 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
switch (mbmi->mode) {
case NEARMV:
mv0->as_int = nearby.as_int;
- clamp_mv2(&mv0->as_mv, xd);
-
- if (is_compound) {
+ if (is_compound)
mv1->as_int = nearby_second.as_int;
- clamp_mv2(&mv1->as_mv, xd);
- }
break;
case NEARESTMV:
mv0->as_int = nearest.as_int;
- clamp_mv2(&mv0->as_mv, xd);
-
- if (is_compound) {
+ if (is_compound)
mv1->as_int = nearest_second.as_int;
- clamp_mv2(&mv1->as_mv, xd);
- }
break;
case ZEROMV:
@@ -615,20 +607,20 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
if (cm->comp_pred_mode == HYBRID_PREDICTION)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
for (i = 0; i < REF_CONTEXTS; i++) {
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
}
if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
for (i = 0; i < REF_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
}
@@ -639,7 +631,7 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
// TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove.
// vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs));
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]);
if (cm->frame_type != KEY_FRAME && !cm->intra_only) {
@@ -653,19 +645,19 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
read_switchable_interp_probs(&cm->fc, r);
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
read_comp_pred(cm, r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
- for (i = 0; i < VP9_INTRA_MODES - 1; ++i)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ for (i = 0; i < INTRA_MODES - 1; ++i)
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]);
for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]);
read_mv_probs(r, nmvc, xd->allow_high_precision_mv);
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 5e9d24edc..fd88b6e6c 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -63,17 +63,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 3; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 2; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 1; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ if (vp9_read(r, MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
}
@@ -592,6 +592,7 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
lf_data->frame_buffer = fb;
lf_data->cm = pc;
lf_data->xd = pbi->mb;
+ lf_data->stop = 0;
lf_data->y_only = 0;
}
vp9_loop_filter_frame_init(pc, pc->lf.filter_level);
@@ -615,6 +616,9 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
if (num_threads > 1) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+ // decoding has completed: finish up the loop filter in this thread.
+ if (mi_row + MI_BLOCK_SIZE >= pc->cur_tile_mi_row_end) continue;
+
vp9_worker_sync(&pbi->lf_worker);
lf_data->start = lf_start;
lf_data->stop = mi_row;
@@ -627,13 +631,17 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
}
if (pbi->do_loopfilter_inline) {
+ int lf_start;
if (num_threads > 1) {
- // TODO(jzern): since the loop filter is delayed one mb row, this will be
- // forced to wait for the last row scheduled in the for loop.
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
vp9_worker_sync(&pbi->lf_worker);
+ lf_start = lf_data->stop;
+ } else {
+ lf_start = mi_row - MI_BLOCK_SIZE;
}
vp9_loop_filter_rows(fb, pc, &pbi->mb,
- mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
+ lf_start, pc->mi_rows, 0);
}
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index f6846e7fe..d7c73b665 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -41,9 +41,9 @@ unsigned __int64 Sectionbits[500];
#endif
#ifdef ENTROPY_STATS
-int intra_mode_stats[VP9_INTRA_MODES]
- [VP9_INTRA_MODES]
- [VP9_INTRA_MODES];
+int intra_mode_stats[INTRA_MODES]
+ [INTRA_MODES]
+ [INTRA_MODES];
vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
extern unsigned int active_section;
@@ -54,8 +54,8 @@ extern unsigned int active_section;
int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES];
int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1];
int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2];
-int64_t switchable_interp_stats[VP9_SWITCHABLE_FILTERS+1]
- [VP9_SWITCHABLE_FILTERS];
+int64_t switchable_interp_stats[SWITCHABLE_FILTERS+1]
+ [SWITCHABLE_FILTERS];
void init_tx_count_stats() {
vp9_zero(tx_count_32x32p_stats);
@@ -88,8 +88,8 @@ static void update_tx_count_stats(VP9_COMMON *cm) {
static void update_switchable_interp_stats(VP9_COMMON *cm) {
int i, j;
- for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; ++i)
- for (j = 0; j < VP9_SWITCHABLE_FILTERS; ++j) {
+ for (i = 0; i < SWITCHABLE_FILTERS+1; ++i)
+ for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j];
}
}
@@ -141,11 +141,11 @@ void write_switchable_interp_stats() {
fclose(fp);
printf(
- "vp9_default_switchable_filter_count[VP9_SWITCHABLE_FILTERS+1]"
- "[VP9_SWITCHABLE_FILTERS] = {\n");
- for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; i++) {
+ "vp9_default_switchable_filter_count[SWITCHABLE_FILTERS+1]"
+ "[SWITCHABLE_FILTERS] = {\n");
+ for (i = 0; i < SWITCHABLE_FILTERS+1; i++) {
printf(" { ");
- for (j = 0; j < VP9_SWITCHABLE_FILTERS; j++) {
+ for (j = 0; j < SWITCHABLE_FILTERS; j++) {
printf("%"PRId64", ", switchable_interp_stats[i][j]);
}
printf("},\n");
@@ -181,7 +181,7 @@ static void update_mode(
n--;
for (i = 0; i < n; ++i) {
- vp9_cond_prob_diff_update(w, &Pcur[i], VP9_MODE_UPDATE_PROB, bct[i]);
+ vp9_cond_prob_diff_update(w, &Pcur[i], MODE_UPDATE_PROB, bct[i]);
}
}
@@ -189,11 +189,11 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi,
vp9_writer* const bc) {
VP9_COMMON *const cm = &cpi->common;
int j;
- vp9_prob pnew[VP9_INTRA_MODES - 1];
- unsigned int bct[VP9_INTRA_MODES - 1][2];
+ vp9_prob pnew[INTRA_MODES - 1];
+ unsigned int bct[INTRA_MODES - 1][2];
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
- update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_tree, pnew,
+ update_mode(bc, INTRA_MODES, vp9_intra_mode_tree, pnew,
cm->fc.y_mode_prob[j], bct,
(unsigned int *)cpi->y_mode_count[j]);
}
@@ -228,7 +228,7 @@ void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *w) {
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k],
- VP9_MODE_UPDATE_PROB, cm->counts.mbskip[k]);
+ MODE_UPDATE_PROB, cm->counts.mbskip[k]);
}
static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) {
@@ -238,20 +238,20 @@ static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) {
static void update_switchable_interp_probs(VP9_COMP *const cpi,
vp9_writer* const bc) {
VP9_COMMON *const pc = &cpi->common;
- unsigned int branch_ct[VP9_SWITCHABLE_FILTERS + 1]
- [VP9_SWITCHABLE_FILTERS - 1][2];
- vp9_prob new_prob[VP9_SWITCHABLE_FILTERS + 1][VP9_SWITCHABLE_FILTERS - 1];
+ unsigned int branch_ct[SWITCHABLE_FILTERS + 1]
+ [SWITCHABLE_FILTERS - 1][2];
+ vp9_prob new_prob[SWITCHABLE_FILTERS + 1][SWITCHABLE_FILTERS - 1];
int i, j;
- for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
+ for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
vp9_tree_probs_from_distribution(
vp9_switchable_interp_tree,
new_prob[j], branch_ct[j],
pc->counts.switchable_interp[j], 0);
}
- for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
- for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
+ for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
+ for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) {
vp9_cond_prob_diff_update(bc, &pc->fc.switchable_interp_prob[j][i],
- VP9_MODE_UPDATE_PROB, branch_ct[j][i]);
+ MODE_UPDATE_PROB, branch_ct[j][i]);
}
}
#ifdef MODE_STATS
@@ -264,16 +264,16 @@ static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) {
int i, j;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
- unsigned int branch_ct[VP9_INTER_MODES - 1][2];
- vp9_prob new_prob[VP9_INTER_MODES - 1];
+ unsigned int branch_ct[INTER_MODES - 1][2];
+ vp9_prob new_prob[INTER_MODES - 1];
vp9_tree_probs_from_distribution(vp9_inter_mode_tree,
new_prob, branch_ct,
pc->counts.inter_mode[i], NEARESTMV);
- for (j = 0; j < VP9_INTER_MODES - 1; ++j)
+ for (j = 0; j < INTER_MODES - 1; ++j)
vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j],
- VP9_MODE_UPDATE_PROB, branch_ct[j]);
+ MODE_UPDATE_PROB, branch_ct[j]);
}
}
@@ -1049,7 +1049,7 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
ct_8x8p);
for (j = 0; j < TX_SIZES - 3; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j],
- VP9_MODE_UPDATE_PROB, ct_8x8p[j]);
+ MODE_UPDATE_PROB, ct_8x8p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
@@ -1057,14 +1057,14 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
ct_16x16p);
for (j = 0; j < TX_SIZES - 2; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
- VP9_MODE_UPDATE_PROB, ct_16x16p[j]);
+ MODE_UPDATE_PROB, ct_16x16p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
for (j = 0; j < TX_SIZES - 1; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
- VP9_MODE_UPDATE_PROB, ct_32x32p[j]);
+ MODE_UPDATE_PROB, ct_32x32p[j]);
}
#ifdef MODE_STATS
if (!cpi->dummy_packing)
@@ -1087,17 +1087,17 @@ static void fix_mcomp_filter_type(VP9_COMP *cpi) {
if (cm->mcomp_filter_type == SWITCHABLE) {
// Check to see if only one of the filters is actually used
- int count[VP9_SWITCHABLE_FILTERS];
+ int count[SWITCHABLE_FILTERS];
int i, j, c = 0;
- for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
count[i] = 0;
- for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j)
+ for (j = 0; j <= SWITCHABLE_FILTERS; ++j)
count[i] += cm->counts.switchable_interp[j][i];
c += (count[i] > 0);
}
if (c == 1) {
// Only one filter is used. So set the filter at frame level
- for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
if (count[i]) {
cm->mcomp_filter_type = i;
break;
@@ -1386,7 +1386,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i],
- VP9_MODE_UPDATE_PROB,
+ MODE_UPDATE_PROB,
cpi->intra_inter_count[i]);
if (cm->allow_comp_inter_inter) {
@@ -1400,7 +1400,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
if (use_hybrid_pred)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
- VP9_MODE_UPDATE_PROB,
+ MODE_UPDATE_PROB,
cpi->comp_inter_count[i]);
}
}
@@ -1408,10 +1408,10 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
for (i = 0; i < REF_CONTEXTS; i++) {
vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
- VP9_MODE_UPDATE_PROB,
+ MODE_UPDATE_PROB,
cpi->single_ref_count[i][0]);
vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1],
- VP9_MODE_UPDATE_PROB,
+ MODE_UPDATE_PROB,
cpi->single_ref_count[i][1]);
}
}
@@ -1419,7 +1419,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
for (i = 0; i < REF_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
- VP9_MODE_UPDATE_PROB,
+ MODE_UPDATE_PROB,
cpi->comp_ref_count[i]);
update_mbintra_mode_probs(cpi, &header_bc);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 790b3c22c..9426f44ab 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -48,7 +48,7 @@ typedef struct {
int comp_pred_diff;
int single_pred_diff;
int64_t tx_rd_diff[TX_MODES];
- int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
// Bit flag for each mode whether it has high error in comparison to others.
unsigned int modes_with_high_error;
@@ -121,9 +121,9 @@ struct macroblock {
int mbmode_cost[MB_MODE_COUNT];
unsigned inter_mode_cost[INTER_MODE_CONTEXTS][MB_MODE_COUNT - NEARESTMV];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
- int y_mode_costs[VP9_INTRA_MODES][VP9_INTRA_MODES][VP9_INTRA_MODES];
- int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1]
- [VP9_SWITCHABLE_FILTERS];
+ int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+ int switchable_interp_costs[SWITCHABLE_FILTERS + 1]
+ [SWITCHABLE_FILTERS];
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f8da500e6..360abad77 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -56,7 +56,7 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
* This also avoids the need for divide by zero checks in
* vp9_activity_masking().
*/
-#define VP9_ACTIVITY_AVG_MIN (64)
+#define ACTIVITY_AVG_MIN (64)
/* Motion vector component magnitude threshold for defining fast motion. */
#define FAST_MOTION_MV_THRESH (24)
@@ -131,8 +131,8 @@ static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) {
mb_activity = tt_activity_measure(x);
}
- if (mb_activity < VP9_ACTIVITY_AVG_MIN)
- mb_activity = VP9_ACTIVITY_AVG_MIN;
+ if (mb_activity < ACTIVITY_AVG_MIN)
+ mb_activity = ACTIVITY_AVG_MIN;
return mb_activity;
}
@@ -182,8 +182,8 @@ static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs);
#endif // ACT_MEDIAN
- if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN)
- cpi->activity_avg = VP9_ACTIVITY_AVG_MIN;
+ if (cpi->activity_avg < ACTIVITY_AVG_MIN)
+ cpi->activity_avg = ACTIVITY_AVG_MIN;
// Experimental code: return fixed value normalized for several clips
if (ALT_ACT_MEASURE)
@@ -448,7 +448,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++)
cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
}
}
@@ -2414,15 +2414,15 @@ void vp9_encode_frame(VP9_COMP *cpi) {
cpi->rd_filter_threshes[frame_type][1] >
cpi->rd_filter_threshes[frame_type][2] &&
cpi->rd_filter_threshes[frame_type][1] >
- cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+ cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
filter_type = EIGHTTAP_SMOOTH;
} else if (cpi->rd_filter_threshes[frame_type][2] >
cpi->rd_filter_threshes[frame_type][0] &&
cpi->rd_filter_threshes[frame_type][2] >
- cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+ cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
filter_type = EIGHTTAP_SHARP;
} else if (cpi->rd_filter_threshes[frame_type][0] >
- cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+ cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
filter_type = EIGHTTAP;
} else {
filter_type = SWITCHABLE;
@@ -2445,7 +2445,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
}
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;
cpi->rd_filter_threshes[frame_type][i] =
(cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 2b7cb0b5b..1203c00ab 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -236,22 +236,22 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) {
for (j = 0; j < MV_JOINTS - 1; ++j)
update_mv(bc, branch_ct_joint[j], &mvc->joints[j], prob.joints[j],
- VP9_NMV_UPDATE_PROB);
+ NMV_UPDATE_PROB);
for (i = 0; i < 2; ++i) {
update_mv(bc, branch_ct_sign[i], &mvc->comps[i].sign,
- prob.comps[i].sign, VP9_NMV_UPDATE_PROB);
+ prob.comps[i].sign, NMV_UPDATE_PROB);
for (j = 0; j < MV_CLASSES - 1; ++j)
update_mv(bc, branch_ct_classes[i][j], &mvc->comps[i].classes[j],
- prob.comps[i].classes[j], VP9_NMV_UPDATE_PROB);
+ prob.comps[i].classes[j], NMV_UPDATE_PROB);
for (j = 0; j < CLASS0_SIZE - 1; ++j)
update_mv(bc, branch_ct_class0[i][j], &mvc->comps[i].class0[j],
- prob.comps[i].class0[j], VP9_NMV_UPDATE_PROB);
+ prob.comps[i].class0[j], NMV_UPDATE_PROB);
for (j = 0; j < MV_OFFSET_BITS; ++j)
update_mv(bc, branch_ct_bits[i][j], &mvc->comps[i].bits[j],
- prob.comps[i].bits[j], VP9_NMV_UPDATE_PROB);
+ prob.comps[i].bits[j], NMV_UPDATE_PROB);
}
for (i = 0; i < 2; ++i) {
@@ -260,20 +260,20 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) {
for (k = 0; k < 3; ++k)
update_mv(bc, branch_ct_class0_fp[i][j][k],
&mvc->comps[i].class0_fp[j][k],
- prob.comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB);
+ prob.comps[i].class0_fp[j][k], NMV_UPDATE_PROB);
}
for (j = 0; j < 3; ++j)
update_mv(bc, branch_ct_fp[i][j], &mvc->comps[i].fp[j],
- prob.comps[i].fp[j], VP9_NMV_UPDATE_PROB);
+ prob.comps[i].fp[j], NMV_UPDATE_PROB);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
update_mv(bc, branch_ct_class0_hp[i], &mvc->comps[i].class0_hp,
- prob.comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
+ prob.comps[i].class0_hp, NMV_UPDATE_PROB);
update_mv(bc, branch_ct_hp[i], &mvc->comps[i].hp,
- prob.comps[i].hp, VP9_NMV_UPDATE_PROB);
+ prob.comps[i].hp, NMV_UPDATE_PROB);
}
}
}
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index 993aba767..5b2365308 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -20,8 +20,8 @@ void vp9_init_mode_costs(VP9_COMP *c) {
const vp9_tree_p KT = vp9_intra_mode_tree;
int i, j;
- for (i = 0; i < VP9_INTRA_MODES; i++) {
- for (j = 0; j < VP9_INTRA_MODES; j++) {
+ for (i = 0; i < INTRA_MODES; i++) {
+ for (j = 0; j < INTRA_MODES; j++) {
vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
KT);
}
@@ -31,12 +31,12 @@ void vp9_init_mode_costs(VP9_COMP *c) {
vp9_cost_tokens(c->mb.mbmode_cost, x->fc.y_mode_prob[1],
vp9_intra_mode_tree);
vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
- x->fc.uv_mode_prob[VP9_INTRA_MODES - 1], vp9_intra_mode_tree);
+ x->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
- vp9_kf_uv_mode_prob[VP9_INTRA_MODES - 1],
+ vp9_kf_uv_mode_prob[INTRA_MODES - 1],
vp9_intra_mode_tree);
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
+ for (i = 0; i <= SWITCHABLE_FILTERS; ++i)
vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
x->fc.switchable_interp_prob[i],
vp9_switchable_interp_tree);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 7e44ebd04..34bd43ef1 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -98,9 +98,9 @@ FILE *keyfile;
#ifdef ENTROPY_STATS
-extern int intra_mode_stats[VP9_INTRA_MODES]
- [VP9_INTRA_MODES]
- [VP9_INTRA_MODES];
+extern int intra_mode_stats[INTRA_MODES]
+ [INTRA_MODES]
+ [INTRA_MODES];
#endif
#ifdef MODE_STATS
@@ -444,9 +444,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
void vp9_update_mode_context_stats(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
int i, j;
- unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] =
+ unsigned int (*inter_mode_counts)[INTER_MODES - 1][2] =
cm->fc.inter_mode_counts;
- int64_t (*mv_ref_stats)[VP9_INTER_MODES - 1][2] = cpi->mv_ref_stats;
+ int64_t (*mv_ref_stats)[INTER_MODES - 1][2] = cpi->mv_ref_stats;
FILE *f;
// Read the past stats counters
@@ -460,7 +460,7 @@ void vp9_update_mode_context_stats(VP9_COMP *cpi) {
// Add in the values for this frame
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
- for (j = 0; j < VP9_INTER_MODES - 1; j++) {
+ for (j = 0; j < INTER_MODES - 1; j++) {
mv_ref_stats[i][j][0] += (int64_t)inter_mode_counts[i][j][0];
mv_ref_stats[i][j][1] += (int64_t)inter_mode_counts[i][j][1];
}
@@ -479,12 +479,12 @@ void print_mode_context(VP9_COMP *cpi) {
fprintf(f, "#include \"vp9_entropy.h\"\n");
fprintf(
f,
- "const int inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1] =");
+ "const int inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] =");
fprintf(f, "{\n");
for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
fprintf(f, " {/* %d */ ", j);
fprintf(f, " ");
- for (i = 0; i < VP9_INTER_MODES - 1; i++) {
+ for (i = 0; i < INTER_MODES - 1; i++) {
int this_prob;
int64_t count = cpi->mv_ref_stats[j][i][0] + cpi->mv_ref_stats[j][i][1];
if (count)
@@ -735,7 +735,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_search_skip_flags = 0;
sf->disable_split_var_thresh = 0;
sf->disable_filter_search_var_thresh = 0;
- sf->last_chroma_intra_mode = TM_PRED;
+ sf->intra_y_mode_mask = ALL_INTRA_MODES;
+ sf->intra_uv_mode_mask = ALL_INTRA_MODES;
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
sf->use_uv_intra_rd_estimate = 0;
@@ -765,7 +766,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->static_segmentation = 0;
#endif
sf->use_avoid_tested_higherror = 1;
- sf->adaptive_rd_thresh = 1;
+ sf->adaptive_rd_thresh = MIN((speed + 1), 4);
if (speed == 1) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -798,6 +799,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// the main framework of partition search type.
sf->disable_split_var_thresh = 0;
sf->disable_filter_search_var_thresh = 16;
+
+ sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
+ sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@@ -819,7 +823,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_COMP_REFMISMATCH |
FLAG_SKIP_INTRA_LOWVAR |
FLAG_EARLY_TERMINATE;
- sf->last_chroma_intra_mode = DC_PRED;
+ sf->intra_y_mode_mask = INTRA_DC_TM;
+ sf->intra_uv_mode_mask = INTRA_DC_TM;
sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
@@ -859,6 +864,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->subpel_iters_per_step = 1;
sf->disable_split_var_thresh = 64;
sf->disable_filter_search_var_thresh = 64;
+ sf->intra_y_mode_mask = INTRA_DC_ONLY;
+ sf->intra_uv_mode_mask = INTRA_DC_ONLY;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -1395,7 +1402,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
}
VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
- int i;
+ int i, j;
volatile union {
VP9_COMP *cpi;
VP9_PTR ptr;
@@ -1597,9 +1604,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
vp9_set_speed_features(cpi);
- // Set starting values of RD threshold multipliers (128 = *1)
- for (i = 0; i < MAX_MODES; i++)
- cpi->rd_thresh_mult[i] = 128;
+ // Default rd threshold factors for mode selection
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ for (j = 0; j < MAX_MODES; ++j)
+ cpi->rd_thresh_freq_fact[i][j] = 32;
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
SDX3F, SDX8F, SDX4DF)\
@@ -1800,18 +1808,18 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
fprintf(fmode, "\n#include \"vp9_entropymode.h\"\n\n");
fprintf(fmode, "const unsigned int vp9_kf_default_bmode_counts ");
- fprintf(fmode, "[VP9_INTRA_MODES][VP9_INTRA_MODES]"
- "[VP9_INTRA_MODES] =\n{\n");
+ fprintf(fmode, "[INTRA_MODES][INTRA_MODES]"
+ "[INTRA_MODES] =\n{\n");
- for (i = 0; i < VP9_INTRA_MODES; i++) {
+ for (i = 0; i < INTRA_MODES; i++) {
fprintf(fmode, " { // Above Mode : %d\n", i);
- for (j = 0; j < VP9_INTRA_MODES; j++) {
+ for (j = 0; j < INTRA_MODES; j++) {
fprintf(fmode, " {");
- for (k = 0; k < VP9_INTRA_MODES; k++) {
+ for (k = 0; k < INTRA_MODES; k++) {
if (!intra_mode_stats[i][j][k])
fprintf(fmode, " %5d, ", 1);
else
@@ -2629,8 +2637,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Set various flags etc to special state if it is a key frame
if (cm->frame_type == KEY_FRAME) {
- int i;
-
// Reset the loop filter deltas and segmentation map
setup_features(cm);
@@ -2643,10 +2649,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// The alternate reference frame cannot be active for a key frame
cpi->source_alt_ref_active = 0;
- // Reset the RD threshold multipliers to default of * 1 (128)
- for (i = 0; i < MAX_MODES; i++)
- cpi->rd_thresh_mult[i] = 128;
-
cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
cm->frame_parallel_decoding_mode =
(cpi->oxcf.frame_parallel_decoding_mode != 0);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index c7b35a8c6..653615949 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -79,15 +79,15 @@ typedef struct {
vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
- vp9_prob y_mode_prob[4][VP9_INTRA_MODES - 1];
- vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
+ vp9_prob y_mode_prob[4][INTRA_MODES - 1];
+ vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
- vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
- [VP9_SWITCHABLE_FILTERS - 1];
+ vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
+ [SWITCHABLE_FILTERS - 1];
- int inter_mode_counts[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2];
- vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
+ int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
+ vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
struct tx_probs tx_probs;
vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
@@ -238,6 +238,11 @@ typedef enum {
// Other methods to come
} SUBPEL_SEARCH_METHODS;
+#define ALL_INTRA_MODES 0x3FF
+#define INTRA_DC_ONLY 0x01
+#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
+#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
+
typedef struct {
int RD;
SEARCH_METHODS search_method;
@@ -288,7 +293,8 @@ typedef struct {
// A source variance threshold below which filter search is disabled
// Choose a very large value (UINT_MAX) to use 8-tap always
unsigned int disable_filter_search_var_thresh;
- MB_PREDICTION_MODE last_chroma_intra_mode;
+ int intra_y_mode_mask;
+ int intra_uv_mode_mask;
int use_rd_breakout;
int use_uv_intra_rd_estimate;
int use_fast_lpf_pick;
@@ -375,8 +381,6 @@ typedef struct VP9_COMP {
int ref_frame_mask;
int set_ref_frame_mask;
- int rd_thresh_mult[MAX_MODES];
- int rd_baseline_thresh[BLOCK_SIZES][MAX_MODES];
int rd_threshes[BLOCK_SIZES][MAX_MODES];
int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
@@ -392,9 +396,9 @@ typedef struct VP9_COMP {
// FIXME(rbultje) can this overflow?
int rd_tx_select_threshes[4][TX_MODES];
- int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
- int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1];
- int64_t rd_filter_cache[VP9_SWITCHABLE_FILTERS + 1];
+ int64_t rd_filter_diff[SWITCHABLE_FILTERS + 1];
+ int64_t rd_filter_threshes[4][SWITCHABLE_FILTERS + 1];
+ int64_t rd_filter_cache[SWITCHABLE_FILTERS + 1];
int RDMULT;
int RDDIV;
@@ -469,8 +473,8 @@ typedef struct VP9_COMP {
int cq_target_quality;
- int y_mode_count[4][VP9_INTRA_MODES];
- int y_uv_mode_count[VP9_INTRA_MODES][VP9_INTRA_MODES];
+ int y_mode_count[4][INTRA_MODES];
+ int y_uv_mode_count[INTRA_MODES][INTRA_MODES];
unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
nmv_context_counts NMVcount;
@@ -635,8 +639,8 @@ typedef struct VP9_COMP {
int dummy_packing; /* flag to indicate if packing is dummy */
- unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
- [VP9_SWITCHABLE_FILTERS];
+ unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1]
+ [SWITCHABLE_FILTERS];
unsigned int txfm_stepdown_count[TX_SIZES];
@@ -657,7 +661,7 @@ typedef struct VP9_COMP {
#endif
#ifdef ENTROPY_STATS
- int64_t mv_ref_stats[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2];
+ int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
#endif
} VP9_COMP;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 08b0c454a..74282aafe 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -104,9 +104,8 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
{2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
-#define BASE_RD_THRESH_FREQ_FACT 16
-#define MAX_RD_THRESH_FREQ_FACT 32
-#define MAX_RD_THRESH_FREQ_INC 1
+#define MAX_RD_THRESH_FACT 64
+#define RD_THRESH_INC 1
static void fill_token_costs(vp9_coeff_cost *c,
vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
@@ -212,12 +211,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
} else {
cpi->rd_threshes[bsize][i] = INT_MAX;
}
- cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
-
- if (cpi->sf.adaptive_rd_thresh)
- cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
- else
- cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
}
}
} else {
@@ -236,12 +229,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
} else {
cpi->rd_threshes[bsize][i] = INT_MAX;
}
- cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
-
- if (cpi->sf.adaptive_rd_thresh)
- cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
- else
- cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
}
}
}
@@ -1043,6 +1030,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
+
+ if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
+ continue;
+
// Only do the oblique modes if the best so far is
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -1228,6 +1219,9 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t local_tx_cache[TX_MODES];
const int mis = xd->mode_info_stride;
+ if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
+ continue;
+
if (cpi->common.frame_type == KEY_FRAME) {
const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
const MB_PREDICTION_MODE L = xd->left_available ?
@@ -1325,10 +1319,14 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
- MB_PREDICTION_MODE last_mode = bsize <= BLOCK_8X8 ?
- TM_PRED : cpi->sf.last_chroma_intra_mode;
+ // int mode_mask = (bsize <= BLOCK_8X8)
+ // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ // if (!(mode_mask & (1 << mode)))
+ if (!(cpi->sf.intra_uv_mode_mask & (1 << mode)))
+ continue;
- for (mode = DC_PRED; mode <= last_mode; mode++) {
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd);
@@ -1599,7 +1597,7 @@ typedef struct {
int64_t sse;
int segment_yrate;
MB_PREDICTION_MODE modes[4];
- SEG_RDSTAT rdstat[4][VP9_INTER_MODES];
+ SEG_RDSTAT rdstat[4][INTER_MODES];
int mvthresh;
} BEST_SEG_INFO;
@@ -1962,7 +1960,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (best_rd == INT64_MAX) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
- for (midx = 0; midx < VP9_INTER_MODES; ++midx)
+ for (midx = 0; midx < INTER_MODES; ++midx)
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
return;
@@ -1986,7 +1984,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (this_segment_rd > bsi->segment_rd) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
- for (midx = 0; midx < VP9_INTER_MODES; ++midx)
+ for (midx = 0; midx < INTER_MODES; ++midx)
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
return;
@@ -2189,7 +2187,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int_mv *second_ref_mv,
int64_t comp_pred_diff[NB_PREDICTION_TYPES],
int64_t tx_size_diff[TX_MODES],
- int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {
+ int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) {
MACROBLOCKD *const xd = &x->e_mbd;
// Take a snapshot of the coding context so it can be
@@ -2212,7 +2210,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
// doesn't actually work this way
memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
memcpy(ctx->best_filter_diff, best_filter_diff,
- sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));
+ sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
}
static void setup_pred_block(const MACROBLOCKD *xd,
@@ -2259,10 +2257,10 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
scale[frame_type].x_offset_q4 =
ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
- VP9_REF_SCALE_SHIFT) & 0xf;
+ REF_SCALE_SHIFT) & 0xf;
scale[frame_type].y_offset_q4 =
ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
- VP9_REF_SCALE_SHIFT) & 0xf;
+ REF_SCALE_SHIFT) & 0xf;
// TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
// use the UV scaling factors.
@@ -2747,8 +2745,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_rate_sum = 0;
int64_t tmp_dist_sum = 0;
- cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
- for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
int j;
int64_t rs_rd;
mbmi->interp_filter = i;
@@ -2759,8 +2757,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (i > 0 && intpel_mv) {
cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
tmp_rate_sum, tmp_dist_sum);
- cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
- MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+ cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
+ MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
cpi->rd_filter_cache[i] + rs_rd);
rd = cpi->rd_filter_cache[i];
if (cm->mcomp_filter_type == SWITCHABLE)
@@ -2787,8 +2785,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
rate_sum, dist_sum);
- cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
- MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+ cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
+ MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
cpi->rd_filter_cache[i] + rs_rd);
rd = cpi->rd_filter_cache[i];
if (cm->mcomp_filter_type == SWITCHABLE)
@@ -3080,8 +3078,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t best_tx_diff[TX_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
int64_t best_pred_rd[NB_PREDICTION_TYPES];
- int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];
- int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_rd[SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
MB_MODE_INFO best_mbmode = { 0 };
int j;
int mode_index, best_mode_index = 0;
@@ -3132,7 +3130,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_pred_rd[i] = INT64_MAX;
for (i = 0; i < TX_MODES; i++)
best_tx_rd[i] = INT64_MAX;
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++)
best_filter_rd[i] = INT64_MAX;
for (i = 0; i < TX_SIZES; i++)
rate_uv_intra[i] = INT_MAX;
@@ -3216,7 +3214,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Test best rd so far against threshold for trying this mode.
if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
- cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) ||
+ cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) ||
cpi->rd_threshes[bsize][mode_index] == INT_MAX)
continue;
@@ -3452,7 +3450,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
union b_mode_info tmp_best_bmodes[16];
MB_MODE_INFO tmp_best_mbmode;
PARTITION_INFO tmp_best_partition;
- BEST_SEG_INFO bsi[VP9_SWITCHABLE_FILTERS];
+ BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
int pred_exists = 0;
int uv_skippable;
if (is_comp_pred) {
@@ -3472,7 +3470,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
+ cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
if (cm->mcomp_filter_type != BILINEAR) {
tmp_best_filter = EIGHTTAP;
if (x->source_variance <
@@ -3481,7 +3479,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
vp9_zero(cpi->rd_filter_cache);
} else {
for (switchable_filter_index = 0;
- switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ switchable_filter_index < SWITCHABLE_FILTERS;
++switchable_filter_index) {
int newbest, rs;
int64_t rs_rd;
@@ -3503,8 +3501,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
rs = get_switchable_rate(x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
- MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+ cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
+ MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
tmp_rd + rs_rd);
if (cm->mcomp_filter_type == SWITCHABLE)
tmp_rd += rs_rd;
@@ -3721,7 +3719,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++)
best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
}
@@ -3777,29 +3775,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
}
-#if 0
- // Testing this mode gave rise to an improvement in best error score.
- // Lower threshold a bit for next time
- cpi->rd_thresh_mult[mode_index] =
- (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
- cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
- cpi->rd_threshes[mode_index] =
- (cpi->rd_baseline_thresh[mode_index] >> 7)
- * cpi->rd_thresh_mult[mode_index];
-#endif
- } else {
- // If the mode did not help improve the best error case then
- // raise the threshold for testing that mode next time around.
-#if 0
- cpi->rd_thresh_mult[mode_index] += 4;
-
- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-
- cpi->rd_threshes[mode_index] =
- (cpi->rd_baseline_thresh[mode_index] >> 7)
- * cpi->rd_thresh_mult[mode_index];
-#endif
}
/* keep record of best compound/single-only prediction */
@@ -3832,8 +3807,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
cm->mcomp_filter_type != BILINEAR) {
int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
- VP9_SWITCHABLE_FILTERS : cm->mcomp_filter_type];
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ SWITCHABLE_FILTERS : cm->mcomp_filter_type];
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
int64_t adj_rd;
// In cases of poor prediction, filter_cache[] can contain really big
// values, which actually are bigger than this_rd itself. This can
@@ -3942,33 +3917,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->sf.adaptive_rd_thresh) {
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
if (mode_index == best_mode_index) {
- cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT;
+ cpi->rd_thresh_freq_fact[bsize][mode_index] -=
+ (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
} else {
- cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC;
+ cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
- (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) {
+ (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT)) {
cpi->rd_thresh_freq_fact[bsize][mode_index] =
- cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT;
+ cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT;
}
}
}
}
- // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
-#if 0
- // Reduce the activation RD thresholds for the best choice mode
- if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
- (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
-
- cpi->rd_thresh_mult[best_mode_index] =
- (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
- cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
- cpi->rd_threshes[best_mode_index] =
- (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
- }
-#endif
-
// macroblock modes
*mbmi = best_mbmode;
x->skip |= best_skip2;
@@ -4003,14 +3964,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
if (!x->skip) {
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
if (best_filter_rd[i] == INT64_MAX)
best_filter_diff[i] = 0;
else
best_filter_diff[i] = best_rd - best_filter_rd[i];
}
if (cm->mcomp_filter_type == SWITCHABLE)
- assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0);
+ assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
} else {
vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
}
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index 325925cbd..155ba8a3e 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -50,8 +50,8 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
uint8_t temp2[68 * 64];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 64, hfilter);
@@ -73,8 +73,8 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 64, hfilter);
@@ -107,8 +107,8 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
uint8_t temp2[68 * 64];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 32, hfilter);
@@ -130,8 +130,8 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 32, hfilter);
@@ -164,8 +164,8 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
uint8_t temp2[36 * 32];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 32, hfilter);
@@ -187,8 +187,8 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 32, hfilter);
@@ -221,8 +221,8 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
uint8_t temp2[36 * 32];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 16, hfilter);
@@ -244,8 +244,8 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 16, hfilter);
@@ -442,8 +442,8 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
const int16_t *hfilter, *vfilter;
uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
// First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
@@ -468,8 +468,8 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
// First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
@@ -492,8 +492,8 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 8, hfilter);
@@ -515,8 +515,8 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 8, hfilter);
@@ -536,8 +536,8 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 16, hfilter);
@@ -559,8 +559,8 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 16, hfilter);
@@ -581,8 +581,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
uint8_t temp2[68 * 64];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 64, hfilter);
@@ -604,8 +604,8 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 64, hfilter);
@@ -625,8 +625,8 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
uint8_t temp2[36 * 32];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 32, hfilter);
@@ -648,8 +648,8 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 32, hfilter);
@@ -789,8 +789,8 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 16, hfilter);
@@ -812,8 +812,8 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 16, hfilter);
@@ -833,8 +833,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 8, hfilter);
@@ -856,8 +856,8 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 8, hfilter);
@@ -877,8 +877,8 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
@@ -900,8 +900,8 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
@@ -923,8 +923,8 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 4, hfilter);
@@ -946,8 +946,8 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
const int16_t *hfilter, *vfilter;
- hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+ hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 4, hfilter);
diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm
index 19e2feb57..533456b77 100644
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -270,8 +270,13 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%if mmsize == 16
movhps m2, [srcq+src_strideq*2]
%else ; mmsize == 8
+%if %1 == 4
+ movh m1, [srcq+src_strideq*2]
+ punpckldq m2, m1
+%else
punpckldq m2, [srcq+src_strideq*2]
%endif
+%endif
movh m1, [dstq]
%if mmsize == 16
movlhps m0, m2
@@ -542,9 +547,16 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
movhps m2, [srcq+src_strideq]
movhps m3, [srcq+src_strideq+1]
%else
+%if %1 == 4
+ movh m1, [srcq+src_strideq]
+ punpckldq m2, m1
+ movh m1, [srcq+src_strideq+1]
+ punpckldq m3, m1
+%else
punpckldq m2, [srcq+src_strideq]
punpckldq m3, [srcq+src_strideq+1]
%endif
+%endif
pavgb m2, m3
%if mmsize == 16
movlhps m0, m2
diff --git a/vp9/encoder/x86/vp9_variance_impl_mmx.asm b/vp9/encoder/x86/vp9_variance_impl_mmx.asm
index d3dbefed8..3501cf1fd 100644
--- a/vp9/encoder/x86/vp9_variance_impl_mmx.asm
+++ b/vp9/encoder/x86/vp9_variance_impl_mmx.asm
@@ -342,8 +342,8 @@ sym(vp9_get4x4var_mmx):
movsxd rdx, dword ptr arg(3) ;[recon_stride]
; Row 1
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm1, [rbx] ; Copy eight bytes to mm1
+ movd mm0, [rax] ; Copy 4 bytes to mm0
+ movd mm1, [rbx] ; Copy 4 bytes to mm1
punpcklbw mm0, mm6 ; unpack to higher prrcision
punpcklbw mm1, mm6
psubsw mm0, mm1 ; A-B (low order) to MM0
@@ -351,12 +351,12 @@ sym(vp9_get4x4var_mmx):
pmaddwd mm0, mm0 ; square and accumulate
add rbx,rdx ; Inc pointer into ref data
add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
+ movd mm1, [rbx] ; Copy 4 bytes to mm1
paddd mm7, mm0 ; accumulate in mm7
; Row 2
- movq mm0, [rax] ; Copy eight bytes to mm0
+ movd mm0, [rax] ; Copy 4 bytes to mm0
punpcklbw mm0, mm6 ; unpack to higher prrcision
punpcklbw mm1, mm6
psubsw mm0, mm1 ; A-B (low order) to MM0
@@ -365,11 +365,11 @@ sym(vp9_get4x4var_mmx):
pmaddwd mm0, mm0 ; square and accumulate
add rbx,rdx ; Inc pointer into ref data
add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
+ movd mm1, [rbx] ; Copy 4 bytes to mm1
paddd mm7, mm0 ; accumulate in mm7
; Row 3
- movq mm0, [rax] ; Copy eight bytes to mm0
+ movd mm0, [rax] ; Copy 4 bytes to mm0
punpcklbw mm0, mm6 ; unpack to higher prrcision
punpcklbw mm1, mm6
psubsw mm0, mm1 ; A-B (low order) to MM0
@@ -378,11 +378,11 @@ sym(vp9_get4x4var_mmx):
pmaddwd mm0, mm0 ; square and accumulate
add rbx,rdx ; Inc pointer into ref data
add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
+ movd mm1, [rbx] ; Copy 4 bytes to mm1
paddd mm7, mm0 ; accumulate in mm7
; Row 4
- movq mm0, [rax] ; Copy eight bytes to mm0
+ movd mm0, [rax] ; Copy 4 bytes to mm0
punpcklbw mm0, mm6 ; unpack to higher prrcision
punpcklbw mm1, mm6
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index c6f398101..d5692efb1 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -96,6 +96,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_1_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM)