summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfigure5
-rw-r--r--test/sad_test.cc16
-rw-r--r--test/vp9_intrapred_test.cc90
-rw-r--r--vp10/common/blockd.h8
-rw-r--r--vp10/common/entropy.h3
-rw-r--r--vp10/common/entropymode.c37
-rw-r--r--vp10/common/entropymode.h7
-rw-r--r--vp10/common/enums.h2
-rw-r--r--vp10/common/thread_common.c11
-rw-r--r--vp10/decoder/decodeframe.c28
-rw-r--r--vp10/decoder/decodemv.c38
-rw-r--r--vp10/encoder/aq_complexity.c2
-rw-r--r--vp10/encoder/aq_variance.c2
-rw-r--r--vp10/encoder/bitstream.c104
-rw-r--r--vp10/encoder/bitstream.h1
-rw-r--r--vp10/encoder/encodeframe.c11
-rw-r--r--vp10/encoder/encoder.c3
-rw-r--r--vp10/encoder/encoder.h2
-rw-r--r--vp10/encoder/rd.c12
-rw-r--r--vp10/encoder/rdopt.c192
-rw-r--r--vp10/encoder/segmentation.c3
-rw-r--r--vp10/encoder/subexp.c9
-rw-r--r--vp10/encoder/subexp.h2
-rw-r--r--vp9/common/vp9_mvref_common.c24
-rw-r--r--vp9/common/vp9_mvref_common.h12
-rw-r--r--vp9/common/vp9_reconinter.c6
-rw-r--r--vp9/decoder/vp9_decodeframe.c19
-rw-r--r--vp9/decoder/vp9_decodemv.c250
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c20
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h5
-rw-r--r--vp9/encoder/vp9_denoiser.c20
-rw-r--r--vp9/encoder/vp9_encodeframe.c5
-rw-r--r--vp9/encoder/vp9_noise_estimate.c17
-rw-r--r--vp9/encoder/vp9_pickmode.c16
-rw-r--r--vp9/encoder/vp9_rdopt.c36
-rw-r--r--vp9/encoder/vp9_skin_detection.c14
-rw-r--r--vp9/encoder/vp9_skin_detection.h3
-rw-r--r--vp9/encoder/vp9_speed_features.c2
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c8
-rw-r--r--vpx/vpx_image.h2
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl20
-rw-r--r--vpx_dsp/x86/highbd_intrapred_sse2.asm271
-rw-r--r--vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm18
-rw-r--r--vpx_dsp/x86/highbd_variance_sse2.c64
-rw-r--r--vpx_dsp/x86/sad_sse2.asm23
45 files changed, 987 insertions, 456 deletions
diff --git a/configure b/configure
index 428fba343..5fbd2d002 100755
--- a/configure
+++ b/configure
@@ -35,6 +35,9 @@ Advanced options:
${toggle_debug_libs} in/exclude debug version of libraries
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
${toggle_vp9_highbitdepth} use VP9 high bit depth (10/12) profiles
+ ${toggle_better_hw_compatibility}
+ enable encoder to produce streams with better
+ hardware decoder compatibility
${toggle_vp8} VP8 codec support
${toggle_vp9} VP9 codec support
${toggle_vp10} VP10 codec support
@@ -320,6 +323,7 @@ CONFIG_LIST="
vp9_temporal_denoising
coefficient_range_checking
vp9_highbitdepth
+ better_hw_compatibility
experimental
size_limit
${EXPERIMENT_LIST}
@@ -378,6 +382,7 @@ CMDLINE_SELECT="
temporal_denoising
vp9_temporal_denoising
coefficient_range_checking
+ better_hw_compatibility
vp9_highbitdepth
experimental
"
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 9dec3cb49..a144cfce7 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -702,18 +702,6 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#if HAVE_SSE
#if CONFIG_USE_X86INC
-const SadMxNParam sse_tests[] = {
- make_tuple(4, 8, &vpx_sad4x8_sse, -1),
- make_tuple(4, 4, &vpx_sad4x4_sse, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests));
-
-const SadMxNAvgParam avg_sse_tests[] = {
- make_tuple(4, 8, &vpx_sad4x8_avg_sse, -1),
- make_tuple(4, 4, &vpx_sad4x4_avg_sse, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests));
-
const SadMxNx4Param x4d_sse_tests[] = {
make_tuple(4, 8, &vpx_sad4x8x4d_sse, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_sse, -1),
@@ -736,6 +724,8 @@ const SadMxNParam sse2_tests[] = {
make_tuple(8, 16, &vpx_sad8x16_sse2, -1),
make_tuple(8, 8, &vpx_sad8x8_sse2, -1),
make_tuple(8, 4, &vpx_sad8x4_sse2, -1),
+ make_tuple(4, 8, &vpx_sad4x8_sse2, -1),
+ make_tuple(4, 4, &vpx_sad4x4_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 8),
@@ -786,6 +776,8 @@ const SadMxNAvgParam avg_sse2_tests[] = {
make_tuple(8, 16, &vpx_sad8x16_avg_sse2, -1),
make_tuple(8, 8, &vpx_sad8x8_avg_sse2, -1),
make_tuple(8, 4, &vpx_sad8x4_avg_sse2, -1),
+ make_tuple(4, 8, &vpx_sad4x8_avg_sse2, -1),
+ make_tuple(4, 4, &vpx_sad4x4_avg_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 8),
diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc
index ad3327e2d..e6198afbd 100644
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -132,7 +132,6 @@ using std::tr1::make_tuple;
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_USE_X86INC
-#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -141,13 +140,13 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16, 8),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 8),
@@ -155,34 +154,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
- ::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
- &vpx_highbd_dc_predictor_4x4_c, 4, 8),
- make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
- &vpx_highbd_dc_predictor_8x8_c, 8, 8),
- make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
- &vpx_highbd_dc_predictor_16x16_c, 16, 8),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
- &vpx_highbd_v_predictor_4x4_c, 4, 8),
- make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
- &vpx_highbd_v_predictor_8x8_c, 8, 8),
- make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
- &vpx_highbd_v_predictor_16x16_c, 16, 8),
- make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
- &vpx_highbd_v_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
- &vpx_highbd_tm_predictor_4x4_c, 4, 8),
- make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
- &vpx_highbd_tm_predictor_8x8_c, 8, 8)));
-#endif // !ARCH_X86_64
-#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -194,14 +170,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16,
10),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 10),
@@ -211,35 +187,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
- &vpx_highbd_tm_predictor_4x4_c, 4, 10),
- make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
- &vpx_highbd_tm_predictor_8x8_c, 8, 10)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
- ::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
- &vpx_highbd_dc_predictor_4x4_c, 4, 10),
- make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
- &vpx_highbd_dc_predictor_8x8_c, 8, 10),
- make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
- &vpx_highbd_dc_predictor_16x16_c, 16,
- 10),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
- &vpx_highbd_v_predictor_4x4_c, 4, 10),
- make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
- &vpx_highbd_v_predictor_8x8_c, 8, 10),
- make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
- &vpx_highbd_v_predictor_16x16_c, 16, 10),
- make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
- &vpx_highbd_v_predictor_32x32_c, 32, 10),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
-#endif // !ARCH_X86_64
-#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -251,14 +203,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16,
12),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 12),
@@ -268,33 +220,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
- ::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
- &vpx_highbd_dc_predictor_4x4_c, 4, 12),
- make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
- &vpx_highbd_dc_predictor_8x8_c, 8, 12),
- make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
- &vpx_highbd_dc_predictor_16x16_c, 16,
- 12),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
- &vpx_highbd_v_predictor_4x4_c, 4, 12),
- make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
- &vpx_highbd_v_predictor_8x8_c, 8, 12),
- make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
- &vpx_highbd_v_predictor_16x16_c, 16, 12),
- make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
- &vpx_highbd_v_predictor_32x32_c, 32, 12),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
- &vpx_highbd_tm_predictor_4x4_c, 4, 12),
- make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
- &vpx_highbd_tm_predictor_8x8_c, 8, 12)));
-#endif // !ARCH_X86_64
+
#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 2f10378a6..fce176796 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -82,6 +82,7 @@ typedef struct {
// Only for INTER blocks
INTERP_FILTER interp_filter;
MV_REFERENCE_FRAME ref_frame[2];
+ TX_TYPE tx_type;
// TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
@@ -207,7 +208,7 @@ static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
return subsize_lookup[partition][bsize];
}
-static const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
+static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
DCT_DCT, // DC
ADST_DCT, // V
DCT_ADST, // H
@@ -225,11 +226,12 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd,
const MODE_INFO *const mi = xd->mi[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ (void) block_idx;
if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
- is_inter_block(mbmi) || mbmi->tx_size >= TX_32X32)
+ mbmi->tx_size >= TX_32X32)
return DCT_DCT;
- return intra_mode_to_tx_type_lookup[get_y_mode(mi, block_idx)];
+ return mbmi->tx_type;
}
void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index 2f93cb31c..9a471c818 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -21,7 +21,8 @@
extern "C" {
#endif
-#define DIFF_UPDATE_PROB 252
+#define DIFF_UPDATE_PROB 252
+#define GROUP_DIFF_UPDATE_PROB 252
// Coefficient token alphabet
#define ZERO_TOKEN 0 // 0 Extra Bits 0+0
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 2bb292a6b..78f3650f8 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -326,6 +326,26 @@ static const struct segmentation_probs default_seg_probs = {
};
#endif
+const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
+ -DCT_DCT, 2,
+ -ADST_ADST, 4,
+ -ADST_DCT, -DCT_ADST
+};
+
+static const vpx_prob default_intra_ext_tx_prob[EXT_TX_SIZES]
+ [TX_TYPES][TX_TYPES - 1] = {
+ {{240, 85, 128}, {4, 1, 248}, {4, 1, 8}, {4, 248, 128}},
+ {{244, 85, 128}, {8, 2, 248}, {8, 2, 8}, {8, 248, 128}},
+ {{248, 85, 128}, {16, 4, 248}, {16, 4, 8}, {16, 248, 128}},
+};
+
+static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES]
+ [TX_TYPES - 1] = {
+ {160, 85, 128},
+ {176, 85, 128},
+ {192, 85, 128},
+};
+
static void init_mode_probs(FRAME_CONTEXT *fc) {
vp10_copy(fc->uv_mode_prob, default_uv_probs);
vp10_copy(fc->y_mode_prob, default_if_y_probs);
@@ -342,6 +362,8 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
#endif
+ vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+ vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
}
const vpx_tree_index vp10_switchable_interp_tree
@@ -431,6 +453,21 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) {
fc->skip_probs[i] = mode_mv_merge_probs(
pre_fc->skip_probs[i], counts->skip[i]);
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int j;
+ for (j = 0; j < TX_TYPES; ++j)
+ vpx_tree_merge_probs(vp10_ext_tx_tree,
+ pre_fc->intra_ext_tx_prob[i][j],
+ counts->intra_ext_tx[i][j],
+ fc->intra_ext_tx_prob[i][j]);
+ }
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ vpx_tree_merge_probs(vp10_ext_tx_tree,
+ pre_fc->inter_ext_tx_prob[i],
+ counts->inter_ext_tx[i],
+ fc->inter_ext_tx_prob[i]);
+ }
+
#if CONFIG_MISC_FIXES
if (cm->seg.temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++)
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 42fd9207f..611d3ad13 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -66,6 +66,8 @@ typedef struct frame_contexts {
#if CONFIG_MISC_FIXES
struct segmentation_probs seg;
#endif
+ vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1];
+ vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1];
int initialized;
} FRAME_CONTEXT;
@@ -90,6 +92,8 @@ typedef struct FRAME_COUNTS {
#if CONFIG_MISC_FIXES
struct seg_counts seg;
#endif
+ unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+ unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
} FRAME_COUNTS;
extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
@@ -119,6 +123,9 @@ void vp10_tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
+extern const vpx_tree_index
+ vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)];
+
static INLINE int vp10_ceil_log2(int n) {
int i = 1, p = 2;
while (p < n) {
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index a226a2d69..18c7d1629 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -97,6 +97,8 @@ typedef enum {
TX_TYPES = 4
} TX_TYPE;
+#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
+
typedef enum {
VP9_LAST_FLAG = 1 << 0,
VP9_GOLD_FLAG = 1 << 1,
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index e83cb8e67..0c7a1c22a 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -435,6 +435,17 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts,
comps->fp[i] += comps_t->fp[i];
}
+ for (i = 0; i < EXT_TX_SIZES; i++) {
+ int j;
+ for (j = 0; j < TX_TYPES; ++j)
+ for (k = 0; k < TX_TYPES; k++)
+ cm->counts.intra_ext_tx[i][j][k] += counts->intra_ext_tx[i][j][k];
+ }
+ for (i = 0; i < EXT_TX_SIZES; i++) {
+ for (k = 0; k < TX_TYPES; k++)
+ cm->counts.inter_ext_tx[i][k] += counts->inter_ext_tx[i][k];
+ }
+
#if CONFIG_MISC_FIXES
for (i = 0; i < PREDICTION_PROBS; i++)
for (j = 0; j < 2; j++)
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index c0fbc4949..1c3f18239 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -268,7 +268,7 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
if (eob == 1) {
dqcoeff[0] = 0;
} else {
- if (tx_size <= TX_16X16 && eob <= 10)
+ if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
else if (tx_size == TX_32X32 && eob <= 34)
memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
@@ -2109,8 +2109,8 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
setup_segmentation_dequant(cm);
#if CONFIG_MISC_FIXES
- cm->tx_mode = (xd->lossless[0]) ? ONLY_4X4
- : read_tx_mode(rb);
+ cm->tx_mode = (!cm->seg.enabled && xd->lossless[0]) ? ONLY_4X4
+ : read_tx_mode(rb);
cm->reference_mode = read_frame_reference_mode(cm, rb);
#endif
@@ -2124,6 +2124,23 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
return sz;
}
+static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
+ int i, j, k;
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ for (k = 0; k < TX_TYPES - 1; ++k)
+ vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]);
+ }
+ }
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (k = 0; k < TX_TYPES - 1; ++k)
+ vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]);
+ }
+ }
+}
+
static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP10_COMMON *const cm = &pbi->common;
@@ -2205,6 +2222,7 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
#endif
read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
+ read_ext_tx_probs(fc, &r);
}
return vpx_reader_has_error(&r);
@@ -2245,6 +2263,10 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) {
assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
+ assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
+ sizeof(cm->counts.intra_ext_tx)));
+ assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
+ sizeof(cm->counts.inter_ext_tx)));
}
#endif // NDEBUG
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index b516333e0..a28ae5592 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -100,6 +100,8 @@ static TX_SIZE read_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
TX_MODE tx_mode = cm->tx_mode;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id])
+ return TX_4X4;
if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
return read_selected_tx_size(cm, xd, max_tx_size, r);
else
@@ -294,6 +296,20 @@ static void read_intra_frame_mode_info(VP10_COMMON *const cm,
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+
+ if (mbmi->tx_size < TX_32X32 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ FRAME_COUNTS *counts = xd->counts;
+ TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+ mbmi->tx_type = vpx_read_tree(
+ r, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+ if (counts)
+ ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
}
static int read_mv_component(vpx_reader *r,
@@ -650,6 +666,28 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi,
read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r);
else
read_intra_block_mode_info(cm, xd, mi, r);
+
+ if (mbmi->tx_size < TX_32X32 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ FRAME_COUNTS *counts = xd->counts;
+ if (inter_block) {
+ mbmi->tx_type = vpx_read_tree(
+ r, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[mbmi->tx_size]);
+ if (counts)
+ ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+ mbmi->tx_type = vpx_read_tree(
+ r, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+ if (counts)
+ ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
}
void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd,
diff --git a/vp10/encoder/aq_complexity.c b/vp10/encoder/aq_complexity.c
index 0de044cf9..2506a4e55 100644
--- a/vp10/encoder/aq_complexity.c
+++ b/vp10/encoder/aq_complexity.c
@@ -51,7 +51,7 @@ void vp10_setup_in_frame_q_adj(VP10_COMP *cpi) {
// Make SURE use of floating point in this function is safe.
vpx_clear_system_state();
- if (cm->frame_type == KEY_FRAME ||
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
int segment;
diff --git a/vp10/encoder/aq_variance.c b/vp10/encoder/aq_variance.c
index e8e88c3b6..bed5162fb 100644
--- a/vp10/encoder/aq_variance.c
+++ b/vp10/encoder/aq_variance.c
@@ -47,7 +47,7 @@ void vp10_vaq_frame_setup(VP10_COMP *cpi) {
struct segmentation *seg = &cm->seg;
int i;
- if (cm->frame_type == KEY_FRAME ||
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
vp10_enable_segmentation(seg);
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 361ac9962..ede8bb370 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -58,6 +58,12 @@ static INLINE void write_uniform(vpx_writer *w, int n, int v) {
}
}
+static struct vp10_token ext_tx_encodings[TX_TYPES];
+
+void vp10_encode_token_init() {
+ vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree);
+}
+
static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
const vpx_prob *probs) {
vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]);
@@ -90,6 +96,24 @@ static void prob_diff_update(const vpx_tree_index *tree,
vp10_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
}
+static int prob_diff_update_savings(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
+ const unsigned int counts[/*n - 1*/],
+ int n) {
+ int i;
+ unsigned int branch_ct[32][2];
+ int savings = 0;
+
+ // Assuming max number of probabilities <= 32
+ assert(n <= 32);
+ vp10_tree_probs_from_distribution(tree, branch_ct, counts);
+ for (i = 0; i < n - 1; ++i) {
+ savings += vp10_cond_prob_diff_update_savings(&probs[i],
+ branch_ct[i]);
+ }
+ return savings;
+}
+
static void write_selected_tx_size(const VP10_COMMON *cm,
const MACROBLOCKD *xd, vpx_writer *w) {
TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
@@ -133,6 +157,49 @@ static void update_switchable_interp_probs(VP10_COMMON *cm, vpx_writer *w,
counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
}
+static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+ cm->counts.intra_ext_tx[i][j], TX_TYPES);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ prob_diff_update(vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[i][j],
+ cm->counts.intra_ext_tx[i][j],
+ TX_TYPES, w);
+ }
+ }
+ savings = 0;
+ do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+ cm->counts.inter_ext_tx[i], TX_TYPES);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ prob_diff_update(vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[i],
+ cm->counts.inter_ext_tx[i],
+ TX_TYPES, w);
+ }
+ }
+}
+
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
@@ -303,7 +370,7 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
- !(is_inter && skip)) {
+ !(is_inter && skip) && !xd->lossless[segment_id]) {
write_selected_tx_size(cm, xd, w);
}
@@ -370,6 +437,25 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
}
}
}
+ if (mbmi->tx_size < TX_32X32 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (is_inter) {
+ vp10_write_token(
+ w, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[mbmi->tx_size],
+ &ext_tx_encodings[mbmi->tx_type]);
+ } else {
+ vp10_write_token(
+ w, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]],
+ &ext_tx_encodings[mbmi->tx_type]);
+ }
+ } else {
+ if (!mbmi->skip)
+ assert(mbmi->tx_type == DCT_DCT);
+ }
}
static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
@@ -391,7 +477,8 @@ static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
write_skip(cm, xd, mbmi->segment_id, mi, w);
- if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
+ if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+ !xd->lossless[mbmi->segment_id])
write_selected_tx_size(cm, xd, w);
if (bsize >= BLOCK_8X8) {
@@ -412,6 +499,16 @@ static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]);
+
+ if (mbmi->tx_size < TX_32X32 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ vp10_write_token(
+ w, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]],
+ &ext_tx_encodings[mbmi->tx_type]);
+ }
}
static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
@@ -1260,7 +1357,7 @@ static void write_uncompressed_header(VP10_COMP *cpi,
encode_quantization(cm, wb);
encode_segmentation(cm, xd, wb);
#if CONFIG_MISC_FIXES
- if (xd->lossless[0])
+ if (!cm->seg.enabled && xd->lossless[0])
cm->tx_mode = TX_4X4;
else
write_txfm_mode(cm->tx_mode, wb);
@@ -1380,6 +1477,7 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
&counts->mv);
+ update_ext_tx_probs(cm, &header_bc);
}
vpx_stop_encode(&header_bc);
diff --git a/vp10/encoder/bitstream.h b/vp10/encoder/bitstream.h
index aa0ed2fdf..b1da89f1d 100644
--- a/vp10/encoder/bitstream.h
+++ b/vp10/encoder/bitstream.h
@@ -18,6 +18,7 @@ extern "C" {
#include "vp10/encoder/encoder.h"
+void vp10_encode_token_init();
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) {
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 9381b653d..26ce5a1eb 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -3024,5 +3024,16 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
}
++td->counts->tx.tx_totals[mbmi->tx_size];
++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+ if (mbmi->tx_size < TX_32X32 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (is_inter_block(mbmi)) {
+ ++td->counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ ++td->counts->intra_ext_tx[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+ }
}
}
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 175c6d855..9e3bec40e 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -328,6 +328,7 @@ void vp10_initialize_enc(void) {
vp10_rc_init_minq_luts();
vp10_entropy_mv_init();
vp10_temporal_filter_init();
+ vp10_encode_token_init();
init_done = 1;
}
}
@@ -2654,7 +2655,7 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
struct loopfilter *lf = &cm->lf;
if (is_lossless_requested(&cpi->oxcf)) {
- lf->filter_level = 0;
+ lf->filter_level = 0;
} else {
struct vpx_usec_timer timer;
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 2a44e4744..bd6a00932 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -467,6 +467,8 @@ typedef struct VP10_COMP {
int multi_arf_enabled;
int multi_arf_last_grp_enabled;
+ int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+ int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser;
#endif
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 5623a7202..f4fdb2417 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -83,6 +83,18 @@ static void fill_mode_costs(VP10_COMP *cpi) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
vp10_cost_tokens(cpi->switchable_interp_costs[i],
fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
+
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ vp10_cost_tokens(cpi->intra_tx_type_costs[i][j],
+ fc->intra_ext_tx_prob[i][j],
+ vp10_ext_tx_tree);
+ }
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ vp10_cost_tokens(cpi->inter_tx_type_costs[i],
+ fc->inter_ext_tx_prob[i],
+ vp10_ext_tx_tree);
+ }
}
static void fill_token_costs(vp10_coeff_cost *c,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index bbddc1d29..90a716d2c 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -54,6 +54,8 @@
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
+const double ext_tx_th = 0.99;
+
typedef struct {
PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame[2];
@@ -598,11 +600,56 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int r, s;
+ int64_t d, psse, this_rd, best_rd = INT64_MAX;
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ const int is_inter = is_inter_block(mbmi);
+
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
+ if (mbmi->tx_size < TX_32X32 &&
+ !xd->lossless[mbmi->segment_id]) {
+ for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+ mbmi->tx_type = tx_type;
+ txfm_rd_in_plane(x, &r, &d, &s,
+ &psse, ref_best_rd, 0, bs, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+ if (r == INT_MAX)
+ continue;
+ if (is_inter)
+ r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ else
+ r += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ if (s)
+ this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
+ else
+ this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
+ if (is_inter && !xd->lossless[mbmi->segment_id] && !s)
+ this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
+ if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
+ best_rd = this_rd;
+ best_tx_type = mbmi->tx_type;
+ }
+ }
+ }
+ mbmi->tx_type = best_tx_type;
txfm_rd_in_plane(x, rate, distortion, skip,
sse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+ if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] &&
+ *rate != INT_MAX) {
+ if (is_inter)
+ *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ else
+ *rate += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
}
static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
@@ -632,87 +679,115 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
- int r[TX_SIZES][2], s[TX_SIZES];
- int64_t d[TX_SIZES], sse[TX_SIZES];
- int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX}};
+ int r, s;
+ int64_t d, sse;
+ int64_t rd = INT64_MAX;
int n, m;
int s0, s1;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ const int is_inter = is_inter_block(mbmi);
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
assert(skip_prob > 0);
s0 = vp10_cost_bit(skip_prob, 0);
s1 = vp10_cost_bit(skip_prob, 1);
- if (cm->tx_mode == TX_MODE_SELECT) {
+ if (tx_select) {
start_tx = max_tx_size;
end_tx = 0;
} else {
- TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
- tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ const TX_SIZE chosen_tx_size =
+ VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
start_tx = chosen_tx_size;
end_tx = chosen_tx_size;
}
- for (n = start_tx; n >= end_tx; n--) {
- int r_tx_size = 0;
- for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
- if (m == n)
- r_tx_size += vp10_cost_zero(tx_probs[m]);
- else
- r_tx_size += vp10_cost_one(tx_probs[m]);
- }
- txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
- &sse[n], ref_best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
- r[n][1] = r[n][0];
- if (r[n][0] < INT_MAX) {
- r[n][1] += r_tx_size;
- }
- if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
- rd[n][0] = rd[n][1] = INT64_MAX;
- } else if (s[n]) {
- if (is_inter_block(mbmi)) {
- rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
- r[n][1] -= r_tx_size;
- } else {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skip = 0;
+ *psse = INT64_MAX;
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+ last_rd = INT64_MAX;
+ for (n = start_tx; n >= end_tx; --n) {
+ int r_tx_size = 0;
+ for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
+ if (m == n)
+ r_tx_size += vp10_cost_zero(tx_probs[m]);
+ else
+ r_tx_size += vp10_cost_one(tx_probs[m]);
}
- } else {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
- }
- if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
- !s[n] && sse[n] != INT64_MAX) {
- rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
- rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
- }
+ if (n >= TX_32X32 && tx_type != DCT_DCT) {
+ continue;
+ }
+ mbmi->tx_type = tx_type;
+ txfm_rd_in_plane(x, &r, &d, &s,
+ &sse, ref_best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
+ if (n < TX_32X32 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ r != INT_MAX) {
+ if (is_inter)
+ r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ else
+ r += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
- // Early termination in transform size search.
- if (cpi->sf.tx_size_search_breakout &&
- (rd[n][1] == INT64_MAX ||
- (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
- s[n] == 1))
- break;
+ if (r == INT_MAX)
+ continue;
- if (rd[n][1] < best_rd) {
- best_tx = n;
- best_rd = rd[n][1];
+ if (s) {
+ if (is_inter) {
+ rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
+ }
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
+ }
+
+ if (tx_select && !(s && is_inter))
+ r += r_tx_size;
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
+
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd == INT64_MAX ||
+ (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
+ (n < (int) max_tx_size && rd > last_rd)))
+ break;
+
+ last_rd = rd;
+ if (rd <
+ (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
+ best_rd) {
+ best_tx = n;
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ best_tx_type = mbmi->tx_type;
+ }
}
}
- mbmi->tx_size = best_tx;
- *distortion = d[mbmi->tx_size];
- *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
- *skip = s[mbmi->tx_size];
- *psse = sse[mbmi->tx_size];
+ mbmi->tx_size = best_tx;
+ mbmi->tx_type = best_tx_type;
+ if (mbmi->tx_size >= TX_32X32)
+ assert(mbmi->tx_type == DCT_DCT);
+ txfm_rd_in_plane(x, &r, &d, &s,
+ &sse, ref_best_rd, 0, bs, best_tx,
+ cpi->sf.use_fast_coef_costing);
}
static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -1065,6 +1140,7 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd;
TX_SIZE best_tx = TX_4X4;
+ TX_TYPE best_tx_type = DCT_DCT;
int *bmode_costs;
const MODE_INFO *above_mi = xd->above_mi;
const MODE_INFO *left_mi = xd->left_mi;
@@ -1091,6 +1167,7 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
mode_selected = mode;
best_rd = this_rd;
best_tx = mic->mbmi.tx_size;
+ best_tx_type = mic->mbmi.tx_type;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
@@ -1100,6 +1177,7 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
mic->mbmi.mode = mode_selected;
mic->mbmi.tx_size = best_tx;
+ mic->mbmi.tx_type = best_tx_type;
return best_rd;
}
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index 6a20ee47d..677910fa3 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c
@@ -273,7 +273,7 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree);
// Key frames cannot use temporal prediction
- if (!frame_is_intra_only(cm)) {
+ if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
// Work out probability tree for coding those segments not
// predicted using the temporal method and the cost.
calc_segtree_probs(t_unpred_seg_counts, t_pred_tree, segp->tree_probs);
@@ -300,6 +300,7 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
// Now choose which coding method to use.
if (t_pred_cost < no_pred_cost) {
+ assert(!cm->error_resilient_mode);
seg->temporal_update = 1;
#if !CONFIG_MISC_FIXES
memcpy(segp->tree_probs, t_pred_tree, sizeof(t_pred_tree));
diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c
index 67e820b1f..d4074775b 100644
--- a/vp10/encoder/subexp.c
+++ b/vp10/encoder/subexp.c
@@ -212,3 +212,12 @@ void vp10_cond_prob_diff_update(vpx_writer *w, vpx_prob *oldp,
vpx_write(w, 0, upd);
}
}
+
+int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
+ const unsigned int ct[2]) {
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ vpx_prob newp = get_binary_prob(ct[0], ct[1]);
+ const int savings = vp10_prob_diff_update_savings_search(ct, *oldp, &newp,
+ upd);
+ return savings;
+}
diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h
index 04b96c0bd..091334f1f 100644
--- a/vp10/encoder/subexp.h
+++ b/vp10/encoder/subexp.h
@@ -37,6 +37,8 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
vpx_prob upd,
int stepsize);
+int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
+ const unsigned int ct[2]);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 77d1ff459..9545729fb 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -11,7 +11,7 @@
#include "vp9/common/vp9_mvref_common.h"
-// This function searches the neighbourhood of a given MB/SB
+// This function searches the neighborhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
@@ -24,7 +24,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
int different_ref_found = 0;
int context_counter = 0;
- const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
const TileInfo *const tile = &xd->tile;
@@ -59,8 +59,8 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
for (; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
- xd->mi_stride]->mbmi;
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
different_ref_found = 1;
if (candidate->ref_frame[0] == ref_frame)
@@ -71,7 +71,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
- // on windows platform. The sync here is unncessary if use_perv_frame_mvs
+ // on windows platform. The sync here is unnecessary if use_prev_frame_mvs
// is 0. But after removing it, there will be hang in the unit test on windows
// due to several threads waiting for a thread's signal.
#if defined(_WIN32) && !HAVE_PTHREAD_H
@@ -101,8 +101,8 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
- * xd->mi_stride]->mbmi;
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
// If the candidate is INTRA we don't want to consider its mv.
IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
@@ -156,16 +156,6 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
mi_row, mi_col, sync, data, mode_context);
}
-static void lower_mv_precision(MV *mv, int allow_hp) {
- const int use_hp = allow_hp && vp9_use_mv_hp(mv);
- if (!use_hp) {
- if (mv->row & 1)
- mv->row += (mv->row > 0 ? -1 : 1);
- if (mv->col & 1)
- mv->col += (mv->col > 0 ? -1 : 1);
- }
-}
-
void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
int_mv *mvlist, int_mv *nearest_mv,
int_mv *near_mv) {
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index bd216d433..22fbaf857 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -157,7 +157,7 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
-// skip all additional processing and jump to done!
+// skip all additional processing and jump to Done!
#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \
do { \
if (refmv_count) { \
@@ -207,6 +207,16 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+ const int use_hp = allow_hp && vp9_use_mv_hp(mv);
+ if (!use_hp) {
+ if (mv->row & 1)
+ mv->row += (mv->row > 0 ? -1 : 1);
+ if (mv->col & 1)
+ mv->col += (mv->col > 0 ? -1 : 1);
+ }
+}
+
typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index d8c14ecc8..37658dc94 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -190,6 +190,12 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
// Co-ordinate of containing block to pixel precision.
const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ assert(xd->mi[0]->mbmi.sb_type != BLOCK_4X8 &&
+ xd->mi[0]->mbmi.sb_type != BLOCK_8X4);
+ assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) &&
+ mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x)));
+#endif
if (plane == 0)
pre_buf->buf = xd->block_refs[ref]->buf->y_buffer;
else if (plane == 1)
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 39e4dcfe3..e27634cdd 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -587,7 +587,12 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
// Co-ordinate of containing block to pixel precision.
int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ assert(xd->mi[0]->mbmi.sb_type != BLOCK_4X8 &&
+ xd->mi[0]->mbmi.sb_type != BLOCK_8X4);
+ assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) &&
+ mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x)));
+#endif
// Co-ordinate of the block to 1/16th pixel precision.
x0_16 = (x_start + x) << SUBPEL_BITS;
y0_16 = (y_start + y) << SUBPEL_BITS;
@@ -714,6 +719,18 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
const int is_compound = has_second_ref(&mi->mbmi);
+ int ref;
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const MV_REFERENCE_FRAME frame = mi->mbmi.ref_frame[ref];
+ RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if (!vp9_is_valid_scale(&ref_buf->sf))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf);
+ }
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
struct macroblockd_plane *const pd = &xd->plane[plane];
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index d3ca7b3fe..42f554591 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -284,12 +284,19 @@ static int read_mv_component(vpx_reader *r,
return sign ? -mag : mag;
}
+// TODO(slavarnway): move to vp9_entropymv.h and replace vp9_use_mv_hp
+#define COMPANDED_MVREF_THRESH 8
+static int use_mv_hp(const MV *ref) {
+ return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+ (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
+}
+
static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref,
const nmv_context *ctx,
nmv_context_counts *counts, int allow_hp) {
const MV_JOINT_TYPE joint_type =
(MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints);
- const int use_hp = allow_hp && vp9_use_mv_hp(ref);
+ const int use_hp = allow_hp && use_mv_hp(ref);
MV diff = {0, 0};
if (mv_joint_vertical(joint_type))
@@ -476,12 +483,203 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
}
+static void dec_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist,
+ int_mv *nearest_mv, int_mv *near_mv,
+ int refmv_count) {
+ int i;
+
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < refmv_count; ++i) {
+ lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+ clamp_mv2(&mvlist[i].as_mv, xd);
+ }
+ *nearest_mv = mvlist[0];
+ *near_mv = mvlist[1];
+}
+
static void fpm_sync(void *const data, int mi_row) {
VP9Decoder *const pbi = (VP9Decoder *)data;
vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
mi_row << MI_BLOCK_SIZE_LOG2);
}
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list. If it's the second motion vector or early_break
+// it will also skip all additional processing and jump to Done!
+#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \
+ do { \
+ if (refmv_count) { \
+ if ((mv).as_int != (mv_ref_list)[0].as_int) { \
+ (mv_ref_list)[(refmv_count)] = (mv); \
+ refmv_count++; \
+ goto Done; \
+ } \
+ } else { \
+ (mv_ref_list)[(refmv_count)++] = (mv); \
+ if (early_break) \
+ goto Done; \
+ } \
+ } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \
+ refmv_count, mv_ref_list, Done) \
+ do { \
+ if (is_inter_block(mbmi)) { \
+ if ((mbmi)->ref_frame[0] != ref_frame) \
+ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, Done); \
+ if (has_second_ref(mbmi) && \
+ (mbmi)->ref_frame[1] != ref_frame && \
+ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, Done); \
+ } \
+ } while (0)
+
+// This function searches the neighborhood of a given MB/SB
+// to try and find candidate reference vectors.
+static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ const POSITION *const mv_ref_search,
+ int_mv *mv_ref_list,
+ int mi_row, int mi_col,
+ find_mv_refs_sync sync, void *const data) {
+ const int *ref_sign_bias = cm->ref_frame_sign_bias;
+ int i, refmv_count = 0;
+ int different_ref_found = 0;
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
+ const TileInfo *const tile = &xd->tile;
+ // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop
+ // searching after the first mv is found.
+ const int early_break = (mi->mbmi.mode == NEARESTMV) ||
+ (mi->mbmi.mode == NEWMV);
+
+ // Blank the reference vector list
+ memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done);
+ else if (candidate->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
+ // on windows platform. The sync here is unnecessary if use_prev_frame_mvs
+ // is 0. But after removing it, there will be hang in the unit test on windows
+ // due to several threads waiting for a thread's signal.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+#endif
+
+ // Check the last frame's mode and mv info.
+ if (prev_frame_mvs) {
+ // Synchronize here for frame parallel decode if sync function is provided.
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+
+ if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
+ } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (different_ref_found) {
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+
+ // If the candidate is INTRA we don't want to consider its mv.
+ IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias,
+ refmv_count, mv_ref_list, Done);
+ }
+ }
+ }
+
+ // Since we still don't have a candidate we'll try the last frame.
+ if (prev_frame_mvs) {
+ if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+ prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+ int_mv mv = prev_frame_mvs->mv[0];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done);
+ }
+
+ if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+ prev_frame_mvs->ref_frame[1] != ref_frame &&
+ prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
+ int_mv mv = prev_frame_mvs->mv[1];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ if (mi->mbmi.mode == NEARMV)
+ refmv_count = MAX_MV_REF_CANDIDATES;
+ else
+ // we only care about the nearestmv for the remaining modes
+ refmv_count = 1;
+
+ Done:
+ // Clamp vectors
+ for (i = 0; i < refmv_count; ++i)
+ clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
+
+ return refmv_count;
+}
+
+static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const POSITION *const mv_ref_search,
+ int mi_row, int mi_col) {
+ int i;
+ int context_counter = 0;
+ const TileInfo *const tile = &xd->tile;
+
+ // Get mode count from nearest 2 blocks
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
+ xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+ }
+ }
+
+ return counter_to_context[context_counter];
+}
+
static void read_inter_block_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
MODE_INFO *const mi,
@@ -491,26 +689,13 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2], nearmv[2];
- int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
int ref, is_compound;
- uint8_t inter_mode_ctx[MAX_REF_FRAMES];
+ uint8_t inter_mode_ctx;
+ const POSITION *const mv_ref_search = mv_ref_blocks[bsize];
read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
is_compound = has_second_ref(mbmi);
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp9_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
- &ref_buf->sf);
- vp9_find_mv_refs(cm, xd, mi, frame, ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
- }
+ inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col);
if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
@@ -521,14 +706,27 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
}
} else {
if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, xd, r,
- inter_mode_ctx[mbmi->ref_frame[0]]);
- }
-
- if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
- &nearestmv[ref], &nearmv[ref]);
+ mbmi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ else
+ // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV.
+ // Setting mode to NEARESTMV forces the search to stop after the nearestmv
+ // has been found. After b_modes have been read, mode will be overwritten
+ // by the last b_mode.
+ mbmi->mode = NEARESTMV;
+
+ if (mbmi->mode != ZEROMV) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int_mv ref_mvs[MAX_MV_REF_CANDIDATES];
+ const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ int refmv_count;
+
+ refmv_count = dec_find_mv_refs(cm, xd, mi, frame, mv_ref_search,
+ ref_mvs, mi_row, mi_col, fpm_sync,
+ (void *)pbi);
+
+ dec_find_best_ref_mvs(xd, allow_hp, ref_mvs, &nearestmv[ref],
+ &nearmv[ref], refmv_count);
+ }
}
}
@@ -546,7 +744,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
+ b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
if (b_mode == NEARESTMV || b_mode == NEARMV) {
uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 0def2cf1f..63db214d1 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -191,7 +191,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
BLOCK_SIZE bsize,
int64_t rate,
int64_t dist,
- int skip) {
+ int skip,
+ struct macroblock_plane *const p) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
@@ -199,12 +200,25 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
- const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
- bsize);
+ int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
+ int is_skin = 0;
+ if (refresh_this_block == 0 &&
+ bsize <= BLOCK_16X16 &&
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
+ is_skin = vp9_compute_skin_block(p[0].src.buf,
+ p[1].src.buf,
+ p[2].src.buf,
+ p[0].src.stride,
+ p[1].src.stride,
+ bsize);
+ if (is_skin)
+ refresh_this_block = 1;
+ }
+
// If this block is labeled for refresh, check if we should reset the
// segment_id.
if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index a5b38138b..edf0a973e 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -14,6 +14,8 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
#ifdef __cplusplus
extern "C" {
@@ -93,7 +95,8 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i,
void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
MB_MODE_INFO *const mbmi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip);
+ int64_t rate, int64_t dist, int skip,
+ struct macroblock_plane *const p);
void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
const MB_MODE_INFO *const mbmi,
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 93aa40ae9..6533902b3 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -333,20 +333,12 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int is_skin = 0;
if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenLow) {
- // Take center pixel in block to determine is_skin.
- const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
- const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
- const int uv_width_shift = y_width_shift >> 1;
- const int uv_height_shift = y_height_shift >> 1;
- const int stride = mb->plane[0].src.stride;
- const int strideuv = mb->plane[1].src.stride;
- const uint8_t ysource =
- mb->plane[0].src.buf[y_height_shift * stride + y_width_shift];
- const uint8_t usource =
- mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
- const uint8_t vsource =
- mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
- is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
+ mb->plane[1].src.buf,
+ mb->plane[2].src.buf,
+ mb->plane[0].src.stride,
+ mb->plane[1].src.stride,
+ bs);
}
mv_col = ctx->best_sse_mv.as_mv.col;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index cc4d1f14e..c07eee969 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1045,7 +1045,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
mi_col, bsize, ctx->rate, ctx->dist,
- x->skip);
+ x->skip, p);
}
}
@@ -1705,6 +1705,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = x->plane;
const struct segmentation *const seg = &cm->seg;
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
@@ -1725,7 +1726,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
} else {
// Setting segmentation map for cyclic_refresh.
vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize,
- ctx->rate, ctx->dist, x->skip);
+ ctx->rate, ctx->dist, x->skip, p);
}
vp9_init_plane_quantizers(cpi, x);
}
diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
index 4befbb066..008a40afc 100644
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -145,10 +145,6 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
const uint8_t *src_u = cpi->Source->u_buffer;
const uint8_t *src_v = cpi->Source->v_buffer;
const int src_uvstride = cpi->Source->uv_stride;
- const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
- const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
- const int uv_width_shift = y_width_shift >> 1;
- const int uv_height_shift = y_height_shift >> 1;
int mi_row, mi_col;
int num_low_motion = 0;
int frame_low_motion = 1;
@@ -173,13 +169,12 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// been encoded as zero/low motion x (= thresh_consec_zeromv) frames
// in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
// 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
- const uint8_t ysource =
- src_y[y_height_shift * src_ystride + y_width_shift];
- const uint8_t usource =
- src_u[uv_height_shift * src_uvstride + uv_width_shift];
- const uint8_t vsource =
- src_v[uv_height_shift * src_uvstride + uv_width_shift];
- int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ int is_skin = vp9_compute_skin_block(src_y,
+ src_u,
+ src_v,
+ src_ystride,
+ src_uvstride,
+ bsize);
if (frame_low_motion &&
cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index b929758ca..71b8bdba0 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1263,9 +1263,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
ref_frame = ref_mode_set[idx].ref_frame;
if (cpi->use_svc) {
ref_frame = ref_mode_set_svc[idx].ref_frame;
- if (svc_force_zero_mode[ref_frame - 1] &&
- frame_mv[this_mode][ref_frame].as_int != 0)
- continue;
}
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
@@ -1273,6 +1270,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
+ if (cpi->use_svc) {
+ if (svc_force_zero_mode[ref_frame - 1] &&
+ frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
+ }
+
if (!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) {
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
@@ -1855,6 +1858,13 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (ref_frame_skip_mask & (1 << ref_frame))
continue;
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ if ((bsize == BLOCK_8X4 || bsize == BLOCK_4X8) &&
+ ref_frame > INTRA_FRAME &&
+ vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+ continue;
+#endif
+
// TODO(jingning, agrange): Scaling reference frame not supported for
// sub8x8 blocks. Is this supported now?
if (ref_frame > INTRA_FRAME &&
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 2a6b70703..b8d17205d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1349,11 +1349,25 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
for (ref = 0; ref < 1 + is_compound; ++ref) {
- const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
- pd->pre[ref].stride)];
+ const int bw = b_width_log2_lookup[BLOCK_8X8];
+ const int h = 4 * (i >> bw);
+ const int w = 4 * (i & ((1 << bw) - 1));
+ const struct scale_factors *sf = &xd->block_refs[ref]->sf;
+ int y_stride = pd->pre[ref].stride;
+ uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
+
+ if (vp9_is_scaled(sf)) {
+ const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+ const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+
+ y_stride = xd->block_refs[ref]->buf->y_stride;
+ pre = xd->block_refs[ref]->buf->y_buffer;
+ pre += scaled_buffer_offset(x_start + w, y_start + h,
+ y_stride, sf);
+ }
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_highbd_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height,
@@ -1361,7 +1375,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
} else {
- vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
@@ -1370,7 +1384,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_row * MI_SIZE + 4 * (i / 2));
}
#else
- vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
@@ -3021,7 +3035,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
// Skip checking missing references in both single and compound reference
- // modes. Note that a mode will be skipped iff both reference frames
+ // modes. Note that a mode will be skipped if both reference frames
// are masked out.
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
@@ -3804,6 +3818,16 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
ref_frame = vp9_ref_order[ref_index].ref_frame[0];
second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
+ if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
+ int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
+ if (second_ref_frame > INTRA_FRAME)
+ ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
+ if (ref_scaled)
+ continue;
+ }
+#endif
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c
index c2763b7da..0ca166536 100644
--- a/vp9/encoder/vp9_skin_detection.c
+++ b/vp9/encoder/vp9_skin_detection.c
@@ -48,6 +48,20 @@ int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) {
return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
}
+int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+ int stride, int strideuv, int bsize) {
+ // Take center pixel in block to determine is_skin.
+ const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
+ const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
+ const int uv_width_shift = y_width_shift >> 1;
+ const int uv_height_shift = y_height_shift >> 1;
+ const uint8_t ysource = y[y_height_shift * stride + y_width_shift];
+ const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift];
+ const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift];
+ return vp9_skin_pixel(ysource, usource, vsource);
+}
+
+
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
diff --git a/vp9/encoder/vp9_skin_detection.h b/vp9/encoder/vp9_skin_detection.h
index 0a87ef9f4..73f7c39d9 100644
--- a/vp9/encoder/vp9_skin_detection.h
+++ b/vp9/encoder/vp9_skin_detection.h
@@ -23,6 +23,9 @@ struct VP9_COMP;
int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr);
+int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+ int stride, int strideuv, int bsize);
+
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 318d8100c..c5f0bad8f 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -394,7 +394,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V;
} else {
for (i = 0; i < BLOCK_SIZES; ++i)
- if (i >= BLOCK_16X16)
+ if (i > BLOCK_16X16)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
else
// Use H and V intra mode for block sizes <= 16X16.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index a4e7eb19e..30a7d1013 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -36,6 +36,12 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->current_superframe = 0;
for (i = 0; i < REF_FRAMES; ++i)
svc->ref_frame_index[i] = -1;
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ cpi->svc.ext_frame_flags[sl] = 0;
+ cpi->svc.ext_lst_fb_idx[sl] = 0;
+ cpi->svc.ext_gld_fb_idx[sl] = 1;
+ cpi->svc.ext_alt_fb_idx[sl] = 2;
+ }
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
@@ -566,6 +572,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
// Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is
// needed to support the case where the frame flags may be passed in via
// vpx_codec_encode(), which can be used for the temporal-only svc case.
+ // TODO(marpan): Consider adding an enc_config parameter to better handle
+ // this case.
if (cpi->ext_refresh_frame_flags_pending == 0) {
int sl;
cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index e9e952c48..7958c6980 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -28,7 +28,7 @@ extern "C" {
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures
*/
-#define VPX_IMAGE_ABI_VERSION (3) /**<\hideinitializer*/
+#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/
#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index f71769918..6c6f15e51 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -288,13 +288,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_d153_predictor_4x4/;
add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
@@ -387,7 +387,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
@@ -435,10 +435,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_32x32/;
@@ -990,10 +990,10 @@ add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride,
specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x8 msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x4 mmx neon msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
#
# Avg
@@ -1061,10 +1061,10 @@ add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stri
specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x8_avg msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x4_avg msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
diff --git a/vpx_dsp/x86/highbd_intrapred_sse2.asm b/vpx_dsp/x86/highbd_intrapred_sse2.asm
index b12d29c0a..c61b62104 100644
--- a/vpx_dsp/x86/highbd_intrapred_sse2.asm
+++ b/vpx_dsp/x86/highbd_intrapred_sse2.asm
@@ -17,24 +17,20 @@ pw_16: times 4 dd 16
pw_32: times 4 dd 32
SECTION .text
-INIT_MMX sse
+INIT_XMM sse2
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq
movq m0, [aboveq]
movq m2, [leftq]
- DEFINE_ARGS dst, stride, one
- mov oned, 0x0001
- pxor m1, m1
- movd m3, oned
- pshufw m3, m3, 0x0
paddw m0, m2
- pmaddwd m0, m3
- packssdw m0, m1
- pmaddwd m0, m3
+ pshuflw m1, m0, 0xe
+ paddw m0, m1
+ pshuflw m1, m0, 0x1
+ paddw m0, m1
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq*2], m0
lea dstq, [dstq+strideq*4]
@@ -122,30 +118,29 @@ cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
-%if ARCH_X86_64
INIT_XMM sse2
-cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
+cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset
GET_GOT goffsetq
- pxor m1, m1
mova m0, [aboveq]
mova m2, [aboveq+16]
mova m3, [aboveq+32]
mova m4, [aboveq+48]
- mova m5, [leftq]
- mova m6, [leftq+16]
- mova m7, [leftq+32]
- mova m8, [leftq+48]
+ paddw m0, m2
+ paddw m3, m4
+ mova m2, [leftq]
+ mova m4, [leftq+16]
+ mova m5, [leftq+32]
+ mova m6, [leftq+48]
+ paddw m2, m4
+ paddw m5, m6
+ paddw m0, m3
+ paddw m2, m5
+ pxor m1, m1
+ paddw m0, m2
DEFINE_ARGS dst, stride, stride3, lines4
lea stride3q, [strideq*3]
mov lines4d, 8
- paddw m0, m2
- paddw m0, m3
- paddw m0, m4
- paddw m0, m5
- paddw m0, m6
- paddw m0, m7
- paddw m0, m8
movhlps m2, m0
paddw m0, m2
punpcklwd m0, m1
@@ -181,9 +176,8 @@ cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
-%endif
-INIT_MMX sse
+INIT_XMM sse2
cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
movq m0, [aboveq]
movq [dstq ], m0
@@ -261,43 +255,44 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
jnz .loop
REP_RET
-INIT_MMX sse
-cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
+INIT_XMM sse2
+cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps
movd m1, [aboveq-2]
movq m0, [aboveq]
- pshufw m1, m1, 0x0
+ pshuflw m1, m1, 0x0
+ movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4
+ movlhps m1, m1 ; tl tl tl tl tl tl tl tl
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- movd m3, oned
+ pcmpeqw m3, m3
movd m4, bpsd
- pshufw m3, m3, 0x0
- DEFINE_ARGS dst, stride, line, left
- mov lineq, -2
- mova m2, m3
+ psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl
psllw m3, m4
- add leftq, 8
- psubw m3, m2 ; max possible value
- pxor m4, m4 ; min possible value
- psubw m0, m1
-.loop:
- movq m1, [leftq+lineq*4]
- movq m2, [leftq+lineq*4+2]
- pshufw m1, m1, 0x0
- pshufw m2, m2, 0x0
- paddw m1, m0
+ pcmpeqw m2, m2
+ pxor m4, m4 ; min possible value
+ pxor m3, m2 ; max possible value
+ mova m1, [leftq]
+ pshuflw m2, m1, 0x0
+ pshuflw m5, m1, 0x55
+ movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2
paddw m2, m0
;Clamp to the bit-depth
- pminsw m1, m3
pminsw m2, m3
- pmaxsw m1, m4
pmaxsw m2, m4
;Store the values
- movq [dstq ], m1
- movq [dstq+strideq*2], m2
+ movq [dstq ], m2
+ movhpd [dstq+strideq*2], m2
lea dstq, [dstq+strideq*4]
- inc lineq
- jnz .loop
- REP_RET
+ pshuflw m2, m1, 0xaa
+ pshuflw m5, m1, 0xff
+ movlhps m2, m5
+ paddw m2, m0
+ ;Clamp to the bit-depth
+ pminsw m2, m3
+ pmaxsw m2, m4
+ ;Store the values
+ movq [dstq ], m2
+ movhpd [dstq+strideq*2], m2
+ RET
INIT_XMM sse2
cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
@@ -343,63 +338,55 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
jnz .loop
REP_RET
-%if ARCH_X86_64
INIT_XMM sse2
-cglobal highbd_tm_predictor_16x16, 5, 6, 9, dst, stride, above, left, bps, one
+cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps
movd m2, [aboveq-2]
mova m0, [aboveq]
mova m1, [aboveq+16]
pshuflw m2, m2, 0x0
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- pxor m7, m7
- pxor m8, m8
- pinsrw m7, oned, 0
- pinsrw m8, bpsd, 0
- pshuflw m7, m7, 0x0
+ pcmpeqw m3, m3
+ movd m4, bpsd
+ punpcklqdq m2, m2
+ psllw m3, m4
+ pcmpeqw m5, m5
+ pxor m4, m4 ; min possible value
+ pxor m3, m5 ; max possible value
DEFINE_ARGS dst, stride, line, left
- punpcklqdq m7, m7
mov lineq, -8
- mova m5, m7
- punpcklqdq m2, m2
- psllw m7, m8
- add leftq, 32
- psubw m7, m5 ; max possible value
- pxor m8, m8 ; min possible value
psubw m0, m2
psubw m1, m2
.loop:
- movd m2, [leftq+lineq*4]
- movd m3, [leftq+lineq*4+2]
- pshuflw m2, m2, 0x0
- pshuflw m3, m3, 0x0
- punpcklqdq m2, m2
- punpcklqdq m3, m3
- paddw m4, m2, m0
- paddw m5, m3, m0
+ movd m7, [leftq]
+ pshuflw m5, m7, 0x0
+ pshuflw m2, m7, 0x55
+ punpcklqdq m5, m5 ; l1 l1 l1 l1 l1 l1 l1 l1
+ punpcklqdq m2, m2 ; l2 l2 l2 l2 l2 l2 l2 l2
+ paddw m6, m5, m0 ; t1-tl+l1 to t4-tl+l1
+ paddw m5, m1 ; t5-tl+l1 to t8-tl+l1
+ pminsw m6, m3
+ pminsw m5, m3
+ pmaxsw m6, m4 ; Clamp to the bit-depth
+ pmaxsw m5, m4
+ mova [dstq ], m6
+ mova [dstq +16], m5
+ paddw m6, m2, m0
paddw m2, m1
- paddw m3, m1
- ;Clamp to the bit-depth
- pminsw m4, m7
- pminsw m5, m7
- pminsw m2, m7
- pminsw m3, m7
- pmaxsw m4, m8
- pmaxsw m5, m8
- pmaxsw m2, m8
- pmaxsw m3, m8
- ;Store the values
- mova [dstq ], m4
- mova [dstq+strideq*2 ], m5
- mova [dstq +16], m2
- mova [dstq+strideq*2+16], m3
+ pminsw m6, m3
+ pminsw m2, m3
+ pmaxsw m6, m4
+ pmaxsw m2, m4
+ mova [dstq+strideq*2 ], m6
+ mova [dstq+strideq*2+16], m2
lea dstq, [dstq+strideq*4]
inc lineq
+ lea leftq, [leftq+4]
+
jnz .loop
REP_RET
INIT_XMM sse2
-cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
+cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps
movd m0, [aboveq-2]
mova m1, [aboveq]
mova m2, [aboveq+16]
@@ -407,70 +394,60 @@ cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
mova m4, [aboveq+48]
pshuflw m0, m0, 0x0
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- pxor m10, m10
- pxor m11, m11
- pinsrw m10, oned, 0
- pinsrw m11, bpsd, 0
- pshuflw m10, m10, 0x0
+ pcmpeqw m5, m5
+ movd m6, bpsd
+ psllw m5, m6
+ pcmpeqw m7, m7
+ pxor m6, m6 ; min possible value
+ pxor m5, m7 ; max possible value
+ punpcklqdq m0, m0
DEFINE_ARGS dst, stride, line, left
- punpcklqdq m10, m10
mov lineq, -16
- mova m5, m10
- punpcklqdq m0, m0
- psllw m10, m11
- add leftq, 64
- psubw m10, m5 ; max possible value
- pxor m11, m11 ; min possible value
psubw m1, m0
psubw m2, m0
psubw m3, m0
psubw m4, m0
.loop:
- movd m5, [leftq+lineq*4]
- movd m6, [leftq+lineq*4+2]
- pshuflw m5, m5, 0x0
- pshuflw m6, m6, 0x0
- punpcklqdq m5, m5
- punpcklqdq m6, m6
- paddw m7, m5, m1
- paddw m8, m5, m2
- paddw m9, m5, m3
- paddw m5, m4
- ;Clamp these values to the bit-depth
- pminsw m7, m10
- pminsw m8, m10
- pminsw m9, m10
- pminsw m5, m10
- pmaxsw m7, m11
- pmaxsw m8, m11
- pmaxsw m9, m11
- pmaxsw m5, m11
- ;Store these values
- mova [dstq ], m7
- mova [dstq +16], m8
- mova [dstq +32], m9
- mova [dstq +48], m5
- paddw m7, m6, m1
- paddw m8, m6, m2
- paddw m9, m6, m3
- paddw m6, m4
- ;Clamp these values to the bit-depth
- pminsw m7, m10
- pminsw m8, m10
- pminsw m9, m10
- pminsw m6, m10
- pmaxsw m7, m11
- pmaxsw m8, m11
- pmaxsw m9, m11
- pmaxsw m6, m11
- ;Store these values
- mova [dstq+strideq*2 ], m7
- mova [dstq+strideq*2+16], m8
- mova [dstq+strideq*2+32], m9
- mova [dstq+strideq*2+48], m6
+ movd m7, [leftq]
+ pshuflw m7, m7, 0x0
+ punpcklqdq m7, m7 ; l1 l1 l1 l1 l1 l1 l1 l1
+ paddw m0, m7, m1
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq ], m0
+ paddw m0, m7, m2
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq +16], m0
+ paddw m0, m7, m3
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq +32], m0
+ paddw m0, m7, m4
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq +48], m0
+ movd m7, [leftq+2]
+ pshuflw m7, m7, 0x0
+ punpcklqdq m7, m7 ; l2 l2 l2 l2 l2 l2 l2 l2
+ paddw m0, m7, m1
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2 ], m0
+ paddw m0, m7, m2
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2+16], m0
+ paddw m0, m7, m3
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2+32], m0
+ paddw m0, m7, m4
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2+48], m0
lea dstq, [dstq+strideq*4]
+ lea leftq, [leftq+4]
inc lineq
jnz .loop
REP_RET
-%endif
diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
index 22d52a2af..30ee81b68 100644
--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -79,20 +79,13 @@ SECTION .text
%macro INC_SRC_BY_SRC_STRIDE 0
%if ARCH_X86=1 && CONFIG_PIC=1
- lea srcq, [srcq + src_stridemp*2]
+ add srcq, src_stridemp
+ add srcq, src_stridemp
%else
lea srcq, [srcq + src_strideq*2]
%endif
%endmacro
-%macro INC_SRC_BY_SRC_2STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
- lea srcq, [srcq + src_stridemp*4]
-%else
- lea srcq, [srcq + src_strideq*4]
-%endif
-%endmacro
-
%macro SUBPEL_VARIANCE 1-2 0 ; W
%define bilin_filter_m bilin_filter_m_sse2
%define filter_idx_shift 5
@@ -984,8 +977,9 @@ SECTION .text
.x_other_y_other_loop:
movu m2, [srcq]
movu m4, [srcq+2]
- movu m3, [srcq+src_strideq*2]
- movu m5, [srcq+src_strideq*2+2]
+ INC_SRC_BY_SRC_STRIDE
+ movu m3, [srcq]
+ movu m5, [srcq+2]
pmullw m2, filter_x_a
pmullw m4, filter_x_b
paddw m2, filter_rnd
@@ -1018,7 +1012,7 @@ SECTION .text
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
- INC_SRC_BY_SRC_2STRIDE
+ INC_SRC_BY_SRC_STRIDE
lea dstq, [dstq + dst_strideq * 4]
%if %2 == 1 ; avg
add secq, sec_str
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
index b45331caa..81ec5dbdb 100644
--- a/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
@@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
}
#if CONFIG_USE_X86INC
+// The 2 unused parameters are place holders for PIC enabled build.
+// These definitions are for functions defined in
+// highbd_subpel_variance_impl_sse2.asm
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
int x_offset, int y_offset, \
const uint16_t *dst, \
ptrdiff_t dst_stride, \
- int height, unsigned int *sse);
+ int height, \
+ unsigned int *sse, \
+ void *unused0, void *unused);
#define DECLS(opt1, opt2) \
DECL(8, opt1); \
DECL(16, opt1)
@@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
x_offset, y_offset, \
dst, dst_stride, h, \
- &sse); \
+ &sse, NULL, NULL); \
if (w > wf) { \
unsigned int sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
@@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
x_offset, y_offset, \
dst + 16, \
dst_stride, \
- h, &sse2); \
+ h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
- h, &sse2); \
+ h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48, src_stride, x_offset, y_offset, \
- dst + 48, dst_stride, h, &sse2); \
+ dst + 48, dst_stride, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
x_offset, y_offset, \
dst, dst_stride, \
- h, &sse); \
+ h, &sse, NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
@@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
x_offset, y_offset, \
dst + 16, \
dst_stride, \
- h, &sse2); \
+ h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
- h, &sse2); \
+ h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
x_offset, y_offset, \
dst + 48, dst_stride, \
- h, &sse2); \
+ h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 16 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 32 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
}\
@@ -410,6 +417,7 @@ FNS(sse2, sse);
#undef FNS
#undef FN
+// The 2 unused parameters are place holders for PIC enabled build.
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
@@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
const uint16_t *sec, \
ptrdiff_t sec_stride, \
int height, \
- unsigned int *sse);
+ unsigned int *sse, \
+ void *unused0, void *unused);
#define DECLS(opt1) \
DECL(16, opt1) \
DECL(8, opt1)
@@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src, src_stride, x_offset, \
- y_offset, dst, dst_stride, sec, w, h, &sse); \
+ y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16, src_stride, x_offset, y_offset, \
- dst + 16, dst_stride, sec + 16, w, h, &sse2); \
+ dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 32, src_stride, x_offset, y_offset, \
- dst + 32, dst_stride, sec + 32, w, h, &sse2); \
+ dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48, src_stride, x_offset, y_offset, \
- dst + 48, dst_stride, sec + 48, w, h, &sse2); \
+ dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src, src_stride, x_offset, \
y_offset, dst, dst_stride, \
- sec, w, h, &sse); \
+ sec, w, h, &sse, NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16, src_stride, \
x_offset, y_offset, \
dst + 16, dst_stride, \
- sec + 16, w, h, &sse2); \
+ sec + 16, w, h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
@@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
- sec + 32, w, h, &sse2); \
+ sec + 32, w, h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48, src_stride, \
x_offset, y_offset, \
dst + 48, dst_stride, \
- sec + 48, w, h, &sse2); \
+ sec + 48, w, h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, x_offset, \
y_offset, dst + (start_row * dst_stride), dst_stride, \
- sec + (start_row * w), w, height, &sse2); \
+ sec + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
@@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 16 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 16 + (start_row * dst_stride), dst_stride, \
- sec + 16 + (start_row * w), w, height, &sse2); \
+ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
@@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 32 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 32 + (start_row * dst_stride), dst_stride, \
- sec + 32 + (start_row * w), w, height, &sse2); \
+ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 48 + (start_row * dst_stride), dst_stride, \
- sec + 48 + (start_row * w), w, height, &sse2); \
+ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
} \
diff --git a/vpx_dsp/x86/sad_sse2.asm b/vpx_dsp/x86/sad_sse2.asm
index 0defe1b6d..1ec906c23 100644
--- a/vpx_dsp/x86/sad_sse2.asm
+++ b/vpx_dsp/x86/sad_sse2.asm
@@ -17,7 +17,7 @@ SECTION .text
%if %3 == 5
cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, n_rows
%else ; %3 == 7
-cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
+cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \
src_stride3, ref_stride3, n_rows
%endif ; %3 == 5/7
%else ; avg
@@ -25,7 +25,7 @@ cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 5, src, src_stride, \
+cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
@@ -222,8 +222,8 @@ SAD8XN 16, 1 ; sad8x16_avg_sse2
SAD8XN 8, 1 ; sad8x8_avg_sse2
SAD8XN 4, 1 ; sad8x4_avg_sse2
-; unsigned int vpx_sad4x{4, 8}_sse(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
+; unsigned int vpx_sad4x{4, 8}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
%macro SAD4XN 1-2 0
SAD_FN 4, %1, 7, %2
mov n_rowsd, %1/4
@@ -236,31 +236,32 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2
movd m4, [refq+ref_stride3q]
punpckldq m1, m2
punpckldq m3, m4
+ movlhps m1, m3
%if %2 == 1
pavgb m1, [second_predq+mmsize*0]
- pavgb m3, [second_predq+mmsize*1]
- lea second_predq, [second_predq+mmsize*2]
+ lea second_predq, [second_predq+mmsize*1]
%endif
movd m2, [srcq]
movd m5, [srcq+src_strideq]
movd m4, [srcq+src_strideq*2]
- movd m6, [srcq+src_stride3q]
+ movd m3, [srcq+src_stride3q]
punpckldq m2, m5
- punpckldq m4, m6
+ punpckldq m4, m3
+ movlhps m2, m4
psadbw m1, m2
- psadbw m3, m4
lea refq, [refq+ref_strideq*4]
paddd m0, m1
lea srcq, [srcq+src_strideq*4]
- paddd m0, m3
dec n_rowsd
jg .loop
+ movhlps m1, m0
+ paddd m0, m1
movd eax, m0
RET
%endmacro
-INIT_MMX sse
+INIT_XMM sse2
SAD4XN 8 ; sad4x8_sse
SAD4XN 4 ; sad4x4_sse
SAD4XN 8, 1 ; sad4x8_avg_sse