summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_blockd.h2
-rw-r--r--vp9/common/vp9_idct.h3
-rw-r--r--vp9/common/vp9_rtcd_defs.pl14
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c26
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.h26
-rw-r--r--vp9/common/x86/vp9_idct_intrin_ssse3.c487
-rw-r--r--vp9/decoder/vp9_decodeframe.c50
-rw-r--r--vp9/encoder/vp9_aq_complexity.c16
-rw-r--r--vp9/encoder/vp9_block.h1
-rw-r--r--vp9/encoder/vp9_encodeframe.c45
-rw-r--r--vp9/encoder/vp9_encodemb.c6
-rw-r--r--vp9/encoder/vp9_encoder.c79
-rw-r--r--vp9/encoder/vp9_encoder.h12
-rw-r--r--vp9/encoder/vp9_firstpass.c44
-rw-r--r--vp9/encoder/vp9_mcomp.c3
-rw-r--r--vp9/encoder/vp9_pickmode.c3
-rw-r--r--vp9/encoder/vp9_ratectrl.c111
-rw-r--r--vp9/encoder/vp9_ratectrl.h9
-rw-r--r--vp9/encoder/vp9_rdopt.c52
-rw-r--r--vp9/encoder/vp9_rdopt.h65
-rw-r--r--vp9/encoder/vp9_speed_features.c63
-rw-r--r--vp9/encoder/vp9_speed_features.h4
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c39
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h3
-rw-r--r--vp9/encoder/vp9_variance.c13
-rw-r--r--vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm14
-rw-r--r--vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm4
-rw-r--r--vp9/vp9_cx_iface.c16
-rw-r--r--vp9/vp9_dx_iface.c43
29 files changed, 810 insertions, 443 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 8ca356dd6..9088b0bde 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -228,8 +228,6 @@ typedef struct macroblockd {
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
int lossless;
- /* Inverse transform function pointers. */
- void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
int corrupted;
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index d86877622..3253bcbf4 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -33,6 +33,9 @@ extern "C" {
#define pair_set_epi16(a, b) \
_mm_set_epi16(b, a, b, a, b, a, b, a)
+#define dual_set_epi16(a, b) \
+ _mm_set_epi16(b, b, b, b, a, a, a, a)
+
// Constants:
// for (int i = 1; i< 32; ++i)
// printf("static const int cospi_%d_64 = %.0f;\n", i,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index c300cde62..09ce72ef2 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -360,7 +360,7 @@ specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_10_add sse2 neon_asm dspr2/;
+specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
@@ -422,10 +422,6 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
-add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get_sse_sum_16x16 sse2/;
-$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
-
add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
@@ -435,9 +431,11 @@ specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
-add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get_sse_sum_8x8 sse2/;
-$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
+add_proto qw/void vp9_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get8x8var mmx/, "$sse2_x86inc";
+
+add_proto qw/void vp9_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get16x16var avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x4/, "$sse2_x86inc";
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index ff9c43221..b60f8a06d 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -375,15 +375,6 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \
}
-#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \
- { \
- const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
- const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
- \
- in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
- in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
- }
-
#define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \
{ \
const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
@@ -612,23 +603,6 @@ void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, dc_value);
}
-static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
- const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
- const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
- const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
- const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
-
- const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
- const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
- const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
- const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
-
- out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
- out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
- out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
- out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
-}
-
static void idct8_sse2(__m128i *in) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.h b/vp9/common/x86/vp9_idct_intrin_sse2.h
index 1c62e3272..0f179b49a 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.h
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.h
@@ -45,6 +45,32 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
}
+#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ \
+ in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
+ in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
+ }
+
+static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+
+ out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
+}
+
static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
__m128i tbuf[8];
array_transpose_8x8(res0, res0);
diff --git a/vp9/common/x86/vp9_idct_intrin_ssse3.c b/vp9/common/x86/vp9_idct_intrin_ssse3.c
index e5d3cb5f4..73bf5d1d7 100644
--- a/vp9/common/x86/vp9_idct_intrin_ssse3.c
+++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h> // SSSE3
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
-static void idct16_8col(__m128i *in) {
+static void idct16_8col(__m128i *in, int round) {
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
@@ -36,6 +36,8 @@ static void idct16_8col(__m128i *in) {
const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
__m128i v[16], u[16], s[16], t[16];
@@ -266,28 +268,80 @@ static void idct16_8col(__m128i *in) {
t[15] = _mm_add_epi16(s[12], s[15]);
// stage 6
- s[0] = _mm_add_epi16(t[0], t[7]);
- s[1] = _mm_add_epi16(t[1], t[6]);
- s[2] = _mm_add_epi16(t[2], t[5]);
- s[3] = _mm_add_epi16(t[3], t[4]);
- s[4] = _mm_sub_epi16(t[3], t[4]);
- s[5] = _mm_sub_epi16(t[2], t[5]);
- s[6] = _mm_sub_epi16(t[1], t[6]);
- s[7] = _mm_sub_epi16(t[0], t[7]);
- s[8] = t[8];
- s[9] = t[9];
-
- u[0] = _mm_sub_epi16(t[13], t[10]);
- u[1] = _mm_add_epi16(t[13], t[10]);
- u[2] = _mm_sub_epi16(t[12], t[11]);
- u[3] = _mm_add_epi16(t[12], t[11]);
-
- s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
- s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
- s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2);
- s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2);
- s[14] = t[14];
- s[15] = t[15];
+ if (round == 1) {
+ s[0] = _mm_add_epi16(t[0], t[7]);
+ s[1] = _mm_add_epi16(t[1], t[6]);
+ s[2] = _mm_add_epi16(t[2], t[5]);
+ s[3] = _mm_add_epi16(t[3], t[4]);
+ s[4] = _mm_sub_epi16(t[3], t[4]);
+ s[5] = _mm_sub_epi16(t[2], t[5]);
+ s[6] = _mm_sub_epi16(t[1], t[6]);
+ s[7] = _mm_sub_epi16(t[0], t[7]);
+ s[8] = t[8];
+ s[9] = t[9];
+
+ u[0] = _mm_unpacklo_epi16(t[10], t[13]);
+ u[1] = _mm_unpackhi_epi16(t[10], t[13]);
+ u[2] = _mm_unpacklo_epi16(t[11], t[12]);
+ u[3] = _mm_unpackhi_epi16(t[11], t[12]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ s[10] = _mm_packs_epi32(u[0], u[1]);
+ s[13] = _mm_packs_epi32(u[2], u[3]);
+ s[11] = _mm_packs_epi32(u[4], u[5]);
+ s[12] = _mm_packs_epi32(u[6], u[7]);
+ s[14] = t[14];
+ s[15] = t[15];
+ } else {
+ s[0] = _mm_add_epi16(t[0], t[7]);
+ s[1] = _mm_add_epi16(t[1], t[6]);
+ s[2] = _mm_add_epi16(t[2], t[5]);
+ s[3] = _mm_add_epi16(t[3], t[4]);
+ s[4] = _mm_sub_epi16(t[3], t[4]);
+ s[5] = _mm_sub_epi16(t[2], t[5]);
+ s[6] = _mm_sub_epi16(t[1], t[6]);
+ s[7] = _mm_sub_epi16(t[0], t[7]);
+ s[8] = t[8];
+ s[9] = t[9];
+
+ u[0] = _mm_sub_epi16(t[13], t[10]);
+ u[1] = _mm_add_epi16(t[13], t[10]);
+ u[2] = _mm_sub_epi16(t[12], t[11]);
+ u[3] = _mm_add_epi16(t[12], t[11]);
+
+ s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+ s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+ s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2);
+ s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2);
+ s[14] = t[14];
+ s[15] = t[15];
+ }
// stage 7
in[0] = _mm_add_epi16(s[0], s[15]);
@@ -308,10 +362,10 @@ static void idct16_8col(__m128i *in) {
in[15] = _mm_sub_epi16(s[0], s[15]);
}
-static void idct16_sse2(__m128i *in0, __m128i *in1) {
+static void idct16_sse2(__m128i *in0, __m128i *in1, int round) {
array_transpose_16x16(in0, in1);
- idct16_8col(in0);
- idct16_8col(in1);
+ idct16_8col(in0, round);
+ idct16_8col(in1, round);
}
void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest,
@@ -322,10 +376,387 @@ void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest,
input += 8;
load_buffer_8x16(input, in1);
- idct16_sse2(in0, in1);
- idct16_sse2(in0, in1);
+ idct16_sse2(in0, in1, 0);
+ idct16_sse2(in0, in1, 1);
write_buffer_8x16(dest, in0, stride);
dest += 8;
write_buffer_8x16(dest, in1, stride);
}
+
+static void idct16_10_r1(__m128i *in, __m128i *l) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i zero = _mm_setzero_si128();
+
+ const __m128i stg2_01 = dual_set_epi16(3212, 32610);
+ const __m128i stg2_67 = dual_set_epi16(-9512, 31358);
+ const __m128i stg3_01 = dual_set_epi16(6392, 32138);
+ const __m128i stg4_01 = dual_set_epi16(23170, 23170);
+
+
+
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+ __m128i stp1_0, stp1_1, stp1_4, stp1_6,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ // Stage2
+ {
+ const __m128i lo_1_15 = _mm_unpackhi_epi64(in[0], in[0]);
+ const __m128i lo_13_3 = _mm_unpackhi_epi64(in[1], in[1]);
+
+ stp2_8 = _mm_mulhrs_epi16(lo_1_15, stg2_01);
+ stp2_11 = _mm_mulhrs_epi16(lo_13_3, stg2_67);
+ }
+
+ // Stage3
+ {
+ const __m128i lo_2_14 = _mm_unpacklo_epi64(in[1], in[1]);
+ stp1_4 = _mm_mulhrs_epi16(lo_2_14, stg3_01);
+
+ stp1_13 = _mm_unpackhi_epi64(stp2_11, zero);
+ stp1_14 = _mm_unpackhi_epi64(stp2_8, zero);
+ }
+
+ // Stage4
+ {
+ const __m128i lo_0_8 = _mm_unpacklo_epi64(in[0], in[0]);
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13);
+
+ tmp0 = _mm_mulhrs_epi16(lo_0_8, stg4_01);
+ tmp1 = _mm_madd_epi16(lo_9_14, stg4_4);
+ tmp3 = _mm_madd_epi16(lo_9_14, stg4_5);
+ tmp2 = _mm_madd_epi16(lo_10_13, stg4_6);
+ tmp4 = _mm_madd_epi16(lo_10_13, stg4_7);
+
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+
+ stp1_0 = _mm_unpacklo_epi64(tmp0, tmp0);
+ stp1_1 = _mm_unpackhi_epi64(tmp0, tmp0);
+ stp2_9 = _mm_packs_epi32(tmp1, tmp3);
+ stp2_10 = _mm_packs_epi32(tmp2, tmp4);
+
+ stp2_6 = _mm_unpackhi_epi64(stp1_4, zero);
+ }
+
+ // Stage5 and Stage6
+ {
+ tmp0 = _mm_add_epi16(stp2_8, stp2_11);
+ tmp1 = _mm_sub_epi16(stp2_8, stp2_11);
+ tmp2 = _mm_add_epi16(stp2_9, stp2_10);
+ tmp3 = _mm_sub_epi16(stp2_9, stp2_10);
+
+ stp1_9 = _mm_unpacklo_epi64(tmp2, zero);
+ stp1_10 = _mm_unpacklo_epi64(tmp3, zero);
+ stp1_8 = _mm_unpacklo_epi64(tmp0, zero);
+ stp1_11 = _mm_unpacklo_epi64(tmp1, zero);
+
+ stp1_13 = _mm_unpackhi_epi64(tmp3, zero);
+ stp1_14 = _mm_unpackhi_epi64(tmp2, zero);
+ stp1_12 = _mm_unpackhi_epi64(tmp1, zero);
+ stp1_15 = _mm_unpackhi_epi64(tmp0, zero);
+ }
+
+ // Stage6
+ {
+ const __m128i lo_6_5 = _mm_add_epi16(stp2_6, stp1_4);
+ const __m128i lo_6_6 = _mm_sub_epi16(stp2_6, stp1_4);
+ const __m128i lo_10_13 = _mm_sub_epi16(stp1_13, stp1_10);
+ const __m128i lo_10_14 = _mm_add_epi16(stp1_13, stp1_10);
+ const __m128i lo_11_12 = _mm_sub_epi16(stp1_12, stp1_11);
+ const __m128i lo_11_13 = _mm_add_epi16(stp1_12, stp1_11);
+
+ tmp1 = _mm_unpacklo_epi64(lo_6_5, lo_6_6);
+ tmp0 = _mm_unpacklo_epi64(lo_10_13, lo_10_14);
+ tmp4 = _mm_unpacklo_epi64(lo_11_12, lo_11_13);
+
+ stp1_6 = _mm_mulhrs_epi16(tmp1, stg4_01);
+ tmp0 = _mm_mulhrs_epi16(tmp0, stg4_01);
+ tmp4 = _mm_mulhrs_epi16(tmp4, stg4_01);
+
+ stp2_10 = _mm_unpacklo_epi64(tmp0, zero);
+ stp2_13 = _mm_unpackhi_epi64(tmp0, zero);
+ stp2_11 = _mm_unpacklo_epi64(tmp4, zero);
+ stp2_12 = _mm_unpackhi_epi64(tmp4, zero);
+
+ tmp0 = _mm_add_epi16(stp1_0, stp1_4);
+ tmp1 = _mm_sub_epi16(stp1_0, stp1_4);
+ tmp2 = _mm_add_epi16(stp1_1, stp1_6);
+ tmp3 = _mm_sub_epi16(stp1_1, stp1_6);
+
+ stp2_0 = _mm_unpackhi_epi64(tmp0, zero);
+ stp2_1 = _mm_unpacklo_epi64(tmp2, zero);
+ stp2_2 = _mm_unpackhi_epi64(tmp2, zero);
+ stp2_3 = _mm_unpacklo_epi64(tmp0, zero);
+ stp2_4 = _mm_unpacklo_epi64(tmp1, zero);
+ stp2_5 = _mm_unpackhi_epi64(tmp3, zero);
+ stp2_6 = _mm_unpacklo_epi64(tmp3, zero);
+ stp2_7 = _mm_unpackhi_epi64(tmp1, zero);
+ }
+
+ // Stage7. Left 8x16 only.
+ l[0] = _mm_add_epi16(stp2_0, stp1_15);
+ l[1] = _mm_add_epi16(stp2_1, stp1_14);
+ l[2] = _mm_add_epi16(stp2_2, stp2_13);
+ l[3] = _mm_add_epi16(stp2_3, stp2_12);
+ l[4] = _mm_add_epi16(stp2_4, stp2_11);
+ l[5] = _mm_add_epi16(stp2_5, stp2_10);
+ l[6] = _mm_add_epi16(stp2_6, stp1_9);
+ l[7] = _mm_add_epi16(stp2_7, stp1_8);
+ l[8] = _mm_sub_epi16(stp2_7, stp1_8);
+ l[9] = _mm_sub_epi16(stp2_6, stp1_9);
+ l[10] = _mm_sub_epi16(stp2_5, stp2_10);
+ l[11] = _mm_sub_epi16(stp2_4, stp2_11);
+ l[12] = _mm_sub_epi16(stp2_3, stp2_12);
+ l[13] = _mm_sub_epi16(stp2_2, stp2_13);
+ l[14] = _mm_sub_epi16(stp2_1, stp1_14);
+ l[15] = _mm_sub_epi16(stp2_0, stp1_15);
+}
+
+static void idct16_10_r2(__m128i *in) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ const __m128i stg2_0 = dual_set_epi16(3212, 3212);
+ const __m128i stg2_1 = dual_set_epi16(32610, 32610);
+ const __m128i stg2_6 = dual_set_epi16(-9512, -9512);
+ const __m128i stg2_7 = dual_set_epi16(31358, 31358);
+ const __m128i stg3_0 = dual_set_epi16(6392, 6392);
+ const __m128i stg3_1 = dual_set_epi16(32138, 32138);
+ const __m128i stg4_01 = dual_set_epi16(23170, 23170);
+
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+ __m128i stp1_0, stp1_2, stp1_3, stp1_5, stp1_6,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_8_0, stp1_12_0;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+ /* Stage2 */
+ {
+ stp1_8_0 = _mm_mulhrs_epi16(in[1], stg2_0);
+ stp1_15 = _mm_mulhrs_epi16(in[1], stg2_1);
+ stp1_11 = _mm_mulhrs_epi16(in[3], stg2_6);
+ stp1_12_0 = _mm_mulhrs_epi16(in[3], stg2_7);
+ }
+
+ /* Stage3 */
+ {
+ stp2_4 = _mm_mulhrs_epi16(in[2], stg3_0);
+ stp2_7 = _mm_mulhrs_epi16(in[2], stg3_1);
+
+ stp1_9 = stp1_8_0;
+ stp1_10 = stp1_11;
+
+ stp1_13 = stp1_12_0;
+ stp1_14 = stp1_15;
+ }
+
+ /* Stage4 */
+ {
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);
+
+ stp1_0 = _mm_mulhrs_epi16(in[0], stg4_01);
+
+ stp2_5 = stp2_4;
+ stp2_6 = stp2_7;
+
+
+ tmp0 = _mm_madd_epi16(lo_9_14, stg4_4);
+ tmp1 = _mm_madd_epi16(hi_9_14, stg4_4);
+ tmp2 = _mm_madd_epi16(lo_9_14, stg4_5);
+ tmp3 = _mm_madd_epi16(hi_9_14, stg4_5);
+ tmp4 = _mm_madd_epi16(lo_10_13, stg4_6);
+ tmp5 = _mm_madd_epi16(hi_10_13, stg4_6);
+ tmp6 = _mm_madd_epi16(lo_10_13, stg4_7);
+ tmp7 = _mm_madd_epi16(hi_10_13, stg4_7);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp5 = _mm_add_epi32(tmp5, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp7 = _mm_add_epi32(tmp7, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, 14);
+ tmp1 = _mm_srai_epi32(tmp1, 14);
+ tmp2 = _mm_srai_epi32(tmp2, 14);
+ tmp3 = _mm_srai_epi32(tmp3, 14);
+ tmp4 = _mm_srai_epi32(tmp4, 14);
+ tmp5 = _mm_srai_epi32(tmp5, 14);
+ tmp6 = _mm_srai_epi32(tmp6, 14);
+ tmp7 = _mm_srai_epi32(tmp7, 14);
+
+ stp2_9 = _mm_packs_epi32(tmp0, tmp1);
+ stp2_14 = _mm_packs_epi32(tmp2, tmp3);
+ stp2_10 = _mm_packs_epi32(tmp4, tmp5);
+ stp2_13 = _mm_packs_epi32(tmp6, tmp7);
+ }
+
+ /* Stage5 */
+ {
+ stp1_2 = stp1_0;
+ stp1_3 = stp1_0;
+
+ tmp0 = _mm_sub_epi16(stp2_6, stp2_5);
+ tmp1 = _mm_add_epi16(stp2_6, stp2_5);
+
+ stp1_5 = _mm_mulhrs_epi16(tmp0, stg4_01);
+ stp1_6 = _mm_mulhrs_epi16(tmp1, stg4_01);
+
+ stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11);
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10);
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);
+ stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11);
+
+ stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0);
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13);
+ stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0);
+ }
+
+ /* Stage6 */
+ {
+ stp2_0 = _mm_add_epi16(stp1_0, stp2_7);
+ stp2_1 = _mm_add_epi16(stp1_0, stp1_6);
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5);
+ stp2_3 = _mm_add_epi16(stp1_3, stp2_4);
+
+ tmp0 = _mm_sub_epi16(stp1_13, stp1_10);
+ tmp1 = _mm_add_epi16(stp1_13, stp1_10);
+ tmp2 = _mm_sub_epi16(stp1_12, stp1_11);
+ tmp3 = _mm_add_epi16(stp1_12, stp1_11);
+
+ stp2_4 = _mm_sub_epi16(stp1_3, stp2_4);
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);
+ stp2_6 = _mm_sub_epi16(stp1_0, stp1_6);
+ stp2_7 = _mm_sub_epi16(stp1_0, stp2_7);
+
+ stp2_10 = _mm_mulhrs_epi16(tmp0, stg4_01);
+ stp2_13 = _mm_mulhrs_epi16(tmp1, stg4_01);
+ stp2_11 = _mm_mulhrs_epi16(tmp2, stg4_01);
+ stp2_12 = _mm_mulhrs_epi16(tmp3, stg4_01);
+ }
+
+ // Stage7
+ in[0] = _mm_add_epi16(stp2_0, stp1_15);
+ in[1] = _mm_add_epi16(stp2_1, stp1_14);
+ in[2] = _mm_add_epi16(stp2_2, stp2_13);
+ in[3] = _mm_add_epi16(stp2_3, stp2_12);
+ in[4] = _mm_add_epi16(stp2_4, stp2_11);
+ in[5] = _mm_add_epi16(stp2_5, stp2_10);
+ in[6] = _mm_add_epi16(stp2_6, stp1_9);
+ in[7] = _mm_add_epi16(stp2_7, stp1_8);
+ in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+ in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+ in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+ in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+ in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+ in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+ in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+ in[15] = _mm_sub_epi16(stp2_0, stp1_15);
+}
+
+void vp9_idct16x16_10_add_ssse3(const int16_t *input, uint8_t *dest,
+ int stride) {
+ const __m128i final_rounding = _mm_set1_epi16(1<<5);
+ const __m128i zero = _mm_setzero_si128();
+ __m128i in[16], l[16];
+
+ int i;
+ // First 1-D inverse DCT
+ // Load input data.
+ in[0] = _mm_load_si128((const __m128i *)input);
+ in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+
+ TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]);
+
+ idct16_10_r1(in, l);
+
+ // Second 1-D inverse transform, performed per 8x16 block
+ for (i = 0; i < 2; i++) {
+ array_transpose_4X8(l + 8*i, in);
+
+ idct16_10_r2(in);
+
+ // Final rounding and shift
+ in[0] = _mm_adds_epi16(in[0], final_rounding);
+ in[1] = _mm_adds_epi16(in[1], final_rounding);
+ in[2] = _mm_adds_epi16(in[2], final_rounding);
+ in[3] = _mm_adds_epi16(in[3], final_rounding);
+ in[4] = _mm_adds_epi16(in[4], final_rounding);
+ in[5] = _mm_adds_epi16(in[5], final_rounding);
+ in[6] = _mm_adds_epi16(in[6], final_rounding);
+ in[7] = _mm_adds_epi16(in[7], final_rounding);
+ in[8] = _mm_adds_epi16(in[8], final_rounding);
+ in[9] = _mm_adds_epi16(in[9], final_rounding);
+ in[10] = _mm_adds_epi16(in[10], final_rounding);
+ in[11] = _mm_adds_epi16(in[11], final_rounding);
+ in[12] = _mm_adds_epi16(in[12], final_rounding);
+ in[13] = _mm_adds_epi16(in[13], final_rounding);
+ in[14] = _mm_adds_epi16(in[14], final_rounding);
+ in[15] = _mm_adds_epi16(in[15], final_rounding);
+
+ in[0] = _mm_srai_epi16(in[0], 6);
+ in[1] = _mm_srai_epi16(in[1], 6);
+ in[2] = _mm_srai_epi16(in[2], 6);
+ in[3] = _mm_srai_epi16(in[3], 6);
+ in[4] = _mm_srai_epi16(in[4], 6);
+ in[5] = _mm_srai_epi16(in[5], 6);
+ in[6] = _mm_srai_epi16(in[6], 6);
+ in[7] = _mm_srai_epi16(in[7], 6);
+ in[8] = _mm_srai_epi16(in[8], 6);
+ in[9] = _mm_srai_epi16(in[9], 6);
+ in[10] = _mm_srai_epi16(in[10], 6);
+ in[11] = _mm_srai_epi16(in[11], 6);
+ in[12] = _mm_srai_epi16(in[12], 6);
+ in[13] = _mm_srai_epi16(in[13], 6);
+ in[14] = _mm_srai_epi16(in[14], 6);
+ in[15] = _mm_srai_epi16(in[15], 6);
+
+ RECON_AND_STORE(dest, in[0]);
+ RECON_AND_STORE(dest, in[1]);
+ RECON_AND_STORE(dest, in[2]);
+ RECON_AND_STORE(dest, in[3]);
+ RECON_AND_STORE(dest, in[4]);
+ RECON_AND_STORE(dest, in[5]);
+ RECON_AND_STORE(dest, in[6]);
+ RECON_AND_STORE(dest, in[7]);
+ RECON_AND_STORE(dest, in[8]);
+ RECON_AND_STORE(dest, in[9]);
+ RECON_AND_STORE(dest, in[10]);
+ RECON_AND_STORE(dest, in[11]);
+ RECON_AND_STORE(dest, in[12]);
+ RECON_AND_STORE(dest, in[13]);
+ RECON_AND_STORE(dest, in[14]);
+ RECON_AND_STORE(dest, in[15]);
+
+ dest += 8 - (stride * 16);
+ }
+}
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 121b1f2cd..fc70035f2 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -195,30 +195,32 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
struct macroblockd_plane *const pd = &xd->plane[plane];
if (eob > 0) {
TX_TYPE tx_type;
- const PLANE_TYPE plane_type = pd->plane_type;
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- switch (tx_size) {
- case TX_4X4:
- tx_type = get_tx_type_4x4(plane_type, xd, block);
- if (tx_type == DCT_DCT)
- xd->itxm_add(dqcoeff, dst, stride, eob);
- else
- vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type);
- break;
- case TX_8X8:
- tx_type = get_tx_type(plane_type, xd);
- vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_16X16:
- tx_type = get_tx_type(plane_type, xd);
- vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_32X32:
- tx_type = DCT_DCT;
- vp9_idct32x32_add(dqcoeff, dst, stride, eob);
- break;
- default:
- assert(0 && "Invalid transform size");
+ if (xd->lossless) {
+ tx_type = DCT_DCT;
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ const PLANE_TYPE plane_type = pd->plane_type;
+ switch (tx_size) {
+ case TX_4X4:
+ tx_type = get_tx_type_4x4(plane_type, xd, block);
+ vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ tx_type = get_tx_type(plane_type, xd);
+ vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ tx_type = get_tx_type(plane_type, xd);
+ vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ tx_type = DCT_DCT;
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ }
}
if (eob == 1) {
@@ -588,8 +590,6 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd,
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
-
- xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
}
static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c
index 47ad8d8cc..0d6b41d15 100644
--- a/vp9/encoder/vp9_aq_complexity.c
+++ b/vp9/encoder/vp9_aq_complexity.c
@@ -47,11 +47,21 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
// Use some of the segments for in frame Q adjustment.
for (segment = 1; segment < 2; segment++) {
- const int qindex_delta =
+ int qindex_delta =
vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
in_frame_q_adj_ratio[segment]);
- vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
- vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+
+ // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0.
+ // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta
+ // is sometimes applied without going back around the rd loop.
+ // This could lead to an illegal combination of partition size and q.
+ if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
+ qindex_delta = -cm->base_qindex + 1;
+ }
+ if ((cm->base_qindex + qindex_delta) > 0) {
+ vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
+ vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+ }
}
}
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index c406860a0..c3cd93b78 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -109,6 +109,7 @@ struct macroblock {
MV pred_mv[MAX_REF_FRAMES];
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
+ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
};
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 6cbc38d79..c1db8263e 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -478,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi,
unsigned int sse = 0;
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high)
- vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
- d + y_idx * dp + x_idx, dp, &sse, &sum);
+ vp9_get8x8var(s + y_idx * sp + x_idx, sp,
+ d + y_idx * dp + x_idx, dp, &sse, &sum);
fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
}
}
@@ -1214,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
int b_offset = b_mi_row * MI_SIZE * src_stride +
b_mi_col * MI_SIZE;
- vp9_get_sse_sum_16x16(src + b_offset, src_stride,
- pre_src + b_offset, pre_stride,
- &d16[j].sse, &d16[j].sum);
+ vp9_get16x16var(src + b_offset, src_stride,
+ pre_src + b_offset, pre_stride,
+ &d16[j].sse, &d16[j].sum);
d16[j].var = d16[j].sse -
(((uint32_t)d16[j].sum * d16[j].sum) >> 8);
@@ -2369,22 +2369,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
sizeof(*xd->above_seg_context) * aligned_mi_cols);
}
-static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
- if (lossless) {
- // printf("Switching to lossless\n");
- cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
- cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
- cpi->mb.optimize = 0;
- cpi->common.lf.filter_level = 0;
- cpi->zbin_mode_boost_enabled = 0;
- cpi->common.tx_mode = ONLY_4X4;
- } else {
- // printf("Not lossless\n");
- cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
- cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
- }
-}
-
static int check_dual_ref_flags(VP9_COMP *cpi) {
const int ref_flags = cpi->ref_frame_flags;
@@ -2421,7 +2405,7 @@ static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
}
static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
- if (cpi->oxcf.lossless) {
+ if (cpi->mb.e_mbd.lossless) {
return ONLY_4X4;
} else if (cpi->common.current_video_frame == 0) {
return TX_MODE_SELECT;
@@ -3011,13 +2995,21 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(rd_opt->tx_select_diff);
vp9_zero(rd_opt->tx_select_threshes);
- cm->tx_mode = select_tx_mode(cpi);
-
cpi->mb.e_mbd.lossless = cm->base_qindex == 0 &&
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
- switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
+
+ cm->tx_mode = select_tx_mode(cpi);
+
+ cpi->mb.fwd_txm4x4 = cpi->mb.e_mbd.lossless ? vp9_fwht4x4 : vp9_fdct4x4;
+ cpi->mb.itxm_add = cpi->mb.e_mbd.lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
+
+ if (cpi->mb.e_mbd.lossless) {
+ cpi->mb.optimize = 0;
+ cpi->common.lf.filter_level = 0;
+ cpi->zbin_mode_boost_enabled = 0;
+ }
vp9_frame_init_quantizer(cpi);
@@ -3357,7 +3349,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
} else {
mbmi->skip = 1;
- if (output_enabled)
+ if (output_enabled &&
+ !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
cm->counts.skip[vp9_get_skip_context(xd)][1]++;
reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 3b231b7f2..8581e6117 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -406,7 +406,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
default:
assert(0 && "Invalid transform size");
@@ -428,7 +428,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (p->eobs[block] > 0)
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
}
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
@@ -574,7 +574,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- xd->itxm_add(dqcoeff, dst, dst_stride, *eob);
+ x->itxm_add(dqcoeff, dst, dst_stride, *eob);
else
vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
}
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 2ce5483d6..0ebc93638 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -393,11 +393,6 @@ static void set_speed_features(VP9_COMP *cpi) {
// Set rd thresholds based on mode and speed setting
vp9_set_rd_speed_thresholds(cpi);
vp9_set_rd_speed_thresholds_sub8x8(cpi);
-
- cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
- if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
- cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
- }
}
static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
@@ -596,16 +591,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
if (cpi->oxcf.mode == REALTIME)
cpi->oxcf.play_alternate = 0;
- cpi->oxcf.lossless = oxcf->lossless;
- if (cpi->oxcf.lossless) {
- // In lossless mode, make sure right quantizer range and correct transform
- // is set.
- cpi->oxcf.worst_allowed_q = 0;
- cpi->oxcf.best_allowed_q = 0;
- cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
- } else {
- cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
- }
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
@@ -627,33 +612,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
// local file playback mode == really big buffer
if (cpi->oxcf.rc_mode == RC_MODE_VBR) {
- cpi->oxcf.starting_buffer_level = 60000;
- cpi->oxcf.optimal_buffer_level = 60000;
- cpi->oxcf.maximum_buffer_size = 240000;
+ cpi->oxcf.starting_buffer_level_ms = 60000;
+ cpi->oxcf.optimal_buffer_level_ms = 60000;
+ cpi->oxcf.maximum_buffer_size_ms = 240000;
}
- cpi->oxcf.starting_buffer_level =
- vp9_rescale(cpi->oxcf.starting_buffer_level,
- cpi->oxcf.target_bandwidth, 1000);
+ rc->starting_buffer_level = vp9_rescale(cpi->oxcf.starting_buffer_level_ms,
+ cpi->oxcf.target_bandwidth, 1000);
// Set or reset optimal and maximum buffer levels.
- if (cpi->oxcf.optimal_buffer_level == 0)
- cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
+ if (cpi->oxcf.optimal_buffer_level_ms == 0)
+ rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.optimal_buffer_level =
- vp9_rescale(cpi->oxcf.optimal_buffer_level,
- cpi->oxcf.target_bandwidth, 1000);
+ rc->optimal_buffer_level = vp9_rescale(cpi->oxcf.optimal_buffer_level_ms,
+ cpi->oxcf.target_bandwidth, 1000);
- if (cpi->oxcf.maximum_buffer_size == 0)
- cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
+ if (cpi->oxcf.maximum_buffer_size_ms == 0)
+ rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.maximum_buffer_size =
- vp9_rescale(cpi->oxcf.maximum_buffer_size,
- cpi->oxcf.target_bandwidth, 1000);
+ rc->maximum_buffer_size = vp9_rescale(cpi->oxcf.maximum_buffer_size_ms,
+ cpi->oxcf.target_bandwidth, 1000);
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
- rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size);
- rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size);
+ rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size);
// Set up frame rate and related parameters rate control values.
vp9_new_framerate(cpi, cpi->oxcf.framerate);
@@ -1439,21 +1421,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
vp8_yv12_extend_frame_borders_c(dst);
}
-static int find_fp_qindex() {
- int i;
-
- for (i = 0; i < QINDEX_RANGE; i++) {
- if (vp9_convert_qindex_to_q(i) >= 30.0) {
- break;
- }
- }
-
- if (i == QINDEX_RANGE)
- i--;
-
- return i;
-}
-
#define WRITE_RECON_BUFFER 0
#if WRITE_RECON_BUFFER
void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
@@ -2308,17 +2275,6 @@ static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
}
-static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
- unsigned int *frame_flags) {
- (void) size;
- (void) dest;
- (void) frame_flags;
-
- vp9_rc_get_first_pass_params(cpi);
- vp9_set_quantizer(&cpi->common, find_fp_qindex());
- vp9_first_pass(cpi);
-}
-
static void Pass2Encode(VP9_COMP *cpi, size_t *size,
uint8_t *dest, unsigned int *frame_flags) {
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
@@ -2658,7 +2614,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (cpi->pass == 1 &&
(!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
- Pass1Encode(cpi, size, dest, frame_flags);
+ const int lossless = is_lossless_requested(&cpi->oxcf);
+ cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
+ cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
+ vp9_first_pass(cpi);
} else if (cpi->pass == 2 &&
(!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
Pass2Encode(cpi, size, dest, frame_flags);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 47c901975..c69a345d0 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -216,9 +216,9 @@ typedef struct VP9EncoderConfig {
int over_shoot_pct;
// buffering parameters
- int64_t starting_buffer_level; // in seconds
- int64_t optimal_buffer_level;
- int64_t maximum_buffer_size;
+ int64_t starting_buffer_level_ms;
+ int64_t optimal_buffer_level_ms;
+ int64_t maximum_buffer_size_ms;
// Frame drop threshold.
int drop_frames_water_mark;
@@ -228,7 +228,6 @@ typedef struct VP9EncoderConfig {
int worst_allowed_q;
int best_allowed_q;
int cq_level;
- int lossless;
AQ_MODE aq_mode; // Adaptive Quantization mode
// Internal frame size scaling.
@@ -257,7 +256,6 @@ typedef struct VP9EncoderConfig {
// these parameters aren't to be used in final build don't use!!!
int play_alternate;
- int alt_freq;
int encode_breakout; // early breakout : for video conf recommend 800
@@ -286,6 +284,10 @@ typedef struct VP9EncoderConfig {
vp8e_tuning tuning;
} VP9EncoderConfig;
+static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
+ return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
+}
+
static INLINE int is_best_mode(MODE mode) {
return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST;
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 0d4f2c72c..dc3832b16 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -398,6 +398,32 @@ static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) {
}
}
+static int find_fp_qindex() {
+ int i;
+
+ for (i = 0; i < QINDEX_RANGE; ++i)
+ if (vp9_convert_qindex_to_q(i) >= 30.0)
+ break;
+
+ if (i == QINDEX_RANGE)
+ i--;
+
+ return i;
+}
+
+static void set_first_pass_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ if (!cpi->refresh_alt_ref_frame &&
+ (cm->current_video_frame == 0 ||
+ (cpi->frame_flags & FRAMEFLAGS_KEY))) {
+ cm->frame_type = KEY_FRAME;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+ // Do not use periodic key frames.
+ cpi->rc.frames_to_key = INT_MAX;
+}
+
void vp9_first_pass(VP9_COMP *cpi) {
int mb_row, mb_col;
MACROBLOCK *const x = &cpi->mb;
@@ -438,6 +464,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp9_clear_system_state();
+ set_first_pass_params(cpi);
+ vp9_set_quantizer(cm, find_fp_qindex());
+
if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
@@ -1576,7 +1605,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Break out conditions.
if (
- // Break at cpi->max_gf_interval unless almost totally static.
+ // Break at active_max_gf_interval unless almost totally static.
(i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) ||
(
// Don't break out with a very short interval.
@@ -2051,19 +2080,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->modified_error_left -= kf_group_err;
}
-void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if (!cpi->refresh_alt_ref_frame &&
- (cm->current_video_frame == 0 ||
- (cpi->frame_flags & FRAMEFLAGS_KEY))) {
- cm->frame_type = KEY_FRAME;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- // Do not use periodic key frames.
- cpi->rc.frames_to_key = INT_MAX;
-}
-
// For VBR...adjustment to the frame target based on error from previous frames
void vbr_rate_correction(int * this_frame_target,
const int64_t vbr_bits_off_target) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 9d2b2a497..dbd19a2d6 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -524,7 +524,8 @@ static int vp9_pattern_search(const MACROBLOCK *x,
// Work out the start point for the search
bestsad = vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride);
+ get_buf_from_mv(in_what, ref_mv), in_what->stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
// Search all possible scales upto the search param around the center point
// pick the scale of the point that is best as the starting scale of
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 11633a73d..913b8ead4 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -280,8 +280,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate_mv = 0;
- if (cpi->sf.disable_inter_mode_mask[bsize] &
- (1 << INTER_OFFSET(this_mode)))
+ if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]],
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 6f646ea0e..0163fd1e8 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -159,7 +159,7 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
lrc->bits_off_target += bits_off_for_this_layer;
// Clip buffer level to maximum buffer size for the layer.
- lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+ lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
lrc->buffer_level = lrc->bits_off_target;
}
}
@@ -167,7 +167,6 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
// Update the buffer level: leaky bucket model.
static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
const VP9_COMMON *const cm = &cpi->common;
- const VP9EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
// Non-viewable frames are a special case and are treated as pure overhead.
@@ -178,7 +177,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
}
// Clip the buffer level to the maximum specified buffer size.
- rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+ rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = rc->bits_off_target;
if (cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR) {
@@ -188,23 +187,20 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
if (pass == 0 && oxcf->rc_mode == RC_MODE_CBR) {
- rc->avg_frame_qindex[0] = oxcf->worst_allowed_q;
- rc->avg_frame_qindex[1] = oxcf->worst_allowed_q;
- rc->avg_frame_qindex[2] = oxcf->worst_allowed_q;
+ rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
+ rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
} else {
- rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q +
- oxcf->best_allowed_q) / 2;
- rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q +
- oxcf->best_allowed_q) / 2;
- rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q +
- oxcf->best_allowed_q) / 2;
+ rc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
+ rc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
}
rc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
rc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
- rc->buffer_level = oxcf->starting_buffer_level;
- rc->bits_off_target = oxcf->starting_buffer_level;
+ rc->buffer_level = rc->starting_buffer_level;
+ rc->bits_off_target = rc->starting_buffer_level;
rc->rolling_target_bits = rc->avg_frame_bandwidth;
rc->rolling_actual_bits = rc->avg_frame_bandwidth;
@@ -250,7 +246,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
// If buffer is below drop_mark, for now just drop every other frame
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark = (int)(oxcf->drop_frames_water_mark *
- oxcf->optimal_buffer_level / 100);
+ rc->optimal_buffer_level / 100);
if ((rc->buffer_level > drop_mark) &&
(rc->decimation_factor > 0)) {
--rc->decimation_factor;
@@ -444,10 +440,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
// ambient Q (at buffer = optimal level) to worst_quality level
// (at buffer = critical level).
const VP9_COMMON *const cm = &cpi->common;
- const VP9EncoderConfig *oxcf = &cpi->oxcf;
const RATE_CONTROL *rc = &cpi->rc;
// Buffer level below which we push active_worst to worst_quality.
- int64_t critical_level = oxcf->optimal_buffer_level >> 2;
+ int64_t critical_level = rc->optimal_buffer_level >> 2;
int64_t buff_lvl_step = 0;
int adjustment = 0;
int active_worst_quality;
@@ -459,26 +454,26 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
else
active_worst_quality = MIN(rc->worst_quality,
rc->avg_frame_qindex[KEY_FRAME] * 3 / 2);
- if (rc->buffer_level > oxcf->optimal_buffer_level) {
+ if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
// Maximum limit for down adjustment, ~30%.
int max_adjustment_down = active_worst_quality / 3;
if (max_adjustment_down) {
- buff_lvl_step = ((oxcf->maximum_buffer_size -
- oxcf->optimal_buffer_level) / max_adjustment_down);
+ buff_lvl_step = ((rc->maximum_buffer_size -
+ rc->optimal_buffer_level) / max_adjustment_down);
if (buff_lvl_step)
- adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) /
+ adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) /
buff_lvl_step);
active_worst_quality -= adjustment;
}
} else if (rc->buffer_level > critical_level) {
// Adjust up from ambient Q.
if (critical_level) {
- buff_lvl_step = (oxcf->optimal_buffer_level - critical_level);
+ buff_lvl_step = (rc->optimal_buffer_level - critical_level);
if (buff_lvl_step) {
adjustment =
(int)((rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) *
- (oxcf->optimal_buffer_level - rc->buffer_level) /
+ (rc->optimal_buffer_level - rc->buffer_level) /
buff_lvl_step);
}
active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment;
@@ -1086,21 +1081,21 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->last_q[KEY_FRAME] = qindex;
rc->avg_frame_qindex[KEY_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
- !(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) {
- rc->avg_frame_qindex[2] =
- ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2);
} else {
- rc->last_q[INTER_FRAME] = qindex;
- rc->avg_frame_qindex[INTER_FRAME] =
+ if (rc->is_src_frame_alt_ref ||
+ !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
+ (cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) {
+ rc->last_q[INTER_FRAME] = qindex;
+ rc->avg_frame_qindex[INTER_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
- rc->ni_frames++;
- rc->tot_q += vp9_convert_qindex_to_q(qindex);
- rc->avg_q = rc->tot_q / rc->ni_frames;
- // Calculate the average Q for normal inter frames (not key or GFU frames).
- rc->ni_tot_qi += qindex;
- rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
+ rc->ni_frames++;
+ rc->tot_q += vp9_convert_qindex_to_q(qindex);
+ rc->avg_q = rc->tot_q / rc->ni_frames;
+ // Calculate the average Q for normal inter frames (not key or GFU
+ // frames).
+ rc->ni_tot_qi += qindex;
+ rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
+ }
}
// Keep record of last boosted (KF/KF/ARF) Q value.
@@ -1227,8 +1222,8 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf;
const RATE_CONTROL *rc = &cpi->rc;
const SVC *const svc = &cpi->svc;
- const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
- const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+ const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
+ const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
int target = rc->avg_frame_bandwidth;
if (svc->number_temporal_layers > 1 &&
@@ -1259,8 +1254,8 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const SVC *const svc = &cpi->svc;
int target;
if (cpi->common.current_video_frame == 0) {
- target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX)
- ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2);
+ target = ((rc->starting_buffer_level / 2) > INT_MAX)
+ ? INT_MAX : (int)(rc->starting_buffer_level / 2);
} else {
int kf_boost = 32;
double framerate = oxcf->framerate;
@@ -1388,6 +1383,24 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
return target_index - qindex;
}
+void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf,
+ RATE_CONTROL *const rc) {
+ // Set Maximum gf/arf interval
+ rc->max_gf_interval = 16;
+
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = oxcf->key_freq >> 1;
+
+ // Special conditions when alt ref frame enabled
+ if (oxcf->play_alternate && oxcf->lag_in_frames) {
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
+
+ if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+ rc->max_gf_interval = rc->static_scene_max_gf_interval;
+}
+
void vp9_rc_update_framerate(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1412,21 +1425,5 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
vbr_max_bits);
- // Set Maximum gf/arf interval
- rc->max_gf_interval = 16;
-
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
-
- // Special conditions when alt ref frame enabled in lagged compress mode
- if (oxcf->play_alternate && oxcf->lag_in_frames) {
- if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
- rc->max_gf_interval = oxcf->lag_in_frames - 1;
-
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
-
- if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
- rc->max_gf_interval = rc->static_scene_max_gf_interval;
+ vp9_rc_set_gf_max_interval(oxcf, rc);
}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 614078eef..f1a4a3f6d 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -61,7 +61,7 @@ typedef struct {
int ni_av_qi;
int ni_tot_qi;
int ni_frames;
- int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF
+ int avg_frame_qindex[FRAME_TYPES];
double tot_q;
double avg_q;
@@ -84,6 +84,10 @@ typedef struct {
int worst_quality;
int best_quality;
+
+ int64_t starting_buffer_level;
+ int64_t optimal_buffer_level;
+ int64_t maximum_buffer_size;
// int active_best_quality;
} RATE_CONTROL;
@@ -178,6 +182,9 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
void vp9_rc_update_framerate(struct VP9_COMP *cpi);
+void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf,
+ RATE_CONTROL *const rc);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d402d7b40..f68aa2738 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1675,9 +1675,9 @@ static INLINE int mv_has_subpel(const MV *mv) {
static int check_best_zero_mv(
const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
- int disable_inter_mode_mask, int this_mode,
+ int inter_mode_mask, int this_mode,
const MV_REFERENCE_FRAME ref_frames[2]) {
- if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+ if ((inter_mode_mask & (1 << ZEROMV)) &&
(this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
(ref_frames[1] == NONE ||
@@ -1743,7 +1743,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
const int has_second_rf = has_second_ref(mbmi);
- const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
+ const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
vp9_zero(*bsi);
@@ -1792,11 +1792,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
mode_idx = INTER_OFFSET(this_mode);
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
- if (disable_inter_mode_mask & (1 << mode_idx))
+ if (!(inter_mode_mask & (1 << this_mode)))
continue;
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
- disable_inter_mode_mask,
+ inter_mode_mask,
this_mode, mbmi->ref_frame))
continue;
@@ -3063,7 +3063,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
const int intra_y_mode_mask =
cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
- int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
+ int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
vp9_zero(best_mbmode);
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -3130,7 +3130,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int inter_non_zero_mode_mask = 0x1F7F7;
mode_skip_mask |= inter_non_zero_mode_mask;
mode_skip_mask &= ~(1 << THR_ZEROMV);
- disable_inter_mode_mask = ~(1 << INTER_OFFSET(ZEROMV));
+ inter_mode_mask = (1 << ZEROMV);
}
// Disable this drop out case if the ref frame
@@ -3182,7 +3182,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mode_index = THR_ZEROMV;
mode_skip_mask = ~(1 << mode_index);
mode_skip_start = MAX_MODES;
- disable_inter_mode_mask = 0;
+ inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
+ (1 << NEWMV);
}
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
@@ -3229,8 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame[0];
- if (ref_frame != INTRA_FRAME &&
- disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
+ if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode)))
continue;
second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
@@ -3279,7 +3279,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
- disable_inter_mode_mask, this_mode, ref_frames))
+ inter_mode_mask, this_mode, ref_frames))
continue;
}
}
@@ -3665,7 +3665,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
- int ref_frame_mask = 0;
int mode_skip_mask = 0;
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -3700,17 +3699,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- for (ref_frame = LAST_FRAME;
- ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
- int i;
- for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
- if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
- ref_frame_mask |= (1 << ref_frame);
- break;
- }
- }
- }
-
for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
@@ -3805,11 +3793,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
(int)ref_frame) {
continue;
- // If the segment skip feature is enabled....
- // then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
- ref_frame != INTRA_FRAME) {
- continue;
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
@@ -4034,15 +4017,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
}
if (!disable_skip) {
- // Test for the condition where skip block will be activated
- // because there are no non zero coefficients and make any
- // necessary adjustment for rate. Ignore if skip is coded at
- // segment level as the cost wont have been added in.
- // Is Mb level skip allowed (i.e. not coded at segment level).
- const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
- SEG_LVL_SKIP);
+ // Skip is never coded at the segment level for sub8x8 blocks and instead
+ // always coded in the bitstream at the mode info level.
- if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
+ if (ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
// Add in the cost of the no skip flag.
@@ -4057,7 +4035,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
rate_uv = 0;
this_skip2 = 1;
}
- } else if (mb_skip_allowed) {
+ } else {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
}
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 5ea09a8a7..e85d08a6d 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -99,41 +99,44 @@ static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
int step_param, int error_per_bit,
const MV *ref_mv, MV *tmp_mv,
int var_max, int rd) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ const SEARCH_METHODS method = sf->search_method;
+ vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
int var = 0;
- if (cpi->sf.search_method == FAST_DIAMOND) {
- var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
- &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
- if (rd && var < var_max)
- var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == FAST_HEX) {
- var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
- &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
- if (rd && var < var_max)
- var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == HEX) {
- var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
- &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
- if (rd && var < var_max)
- var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == SQUARE) {
- var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
- &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
- if (rd && var < var_max)
- var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == BIGDIA) {
- var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
- &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
- if (rd && var < var_max)
- var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
- } else {
- int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
- var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
- further_steps, 1, &cpi->fn_ptr[bsize],
- ref_mv, tmp_mv);
+ switch (method) {
+ case FAST_DIAMOND:
+ var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case FAST_HEX:
+ var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case HEX:
+ var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case SQUARE:
+ var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case BIGDIA:
+ var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case NSTEP:
+ var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
+ (sf->max_step_search_steps - 1) - step_param,
+ 1, fn_ptr, ref_mv, tmp_mv);
+ break;
+ default:
+ assert(!"Invalid search method.");
}
+ if (method != NSTEP && rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
+
return var;
}
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 7c3abd5d7..b7f839747 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -14,20 +14,23 @@
#include "vp9/encoder/vp9_speed_features.h"
enum {
- ALL_INTRA_MODES = (1 << DC_PRED) |
+ INTRA_ALL = (1 << DC_PRED) |
(1 << V_PRED) | (1 << H_PRED) |
(1 << D45_PRED) | (1 << D135_PRED) |
(1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) |
(1 << TM_PRED),
-
- INTRA_DC_ONLY = (1 << DC_PRED),
-
- INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED),
-
+ INTRA_DC = (1 << DC_PRED),
+ INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
+ INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) |
+ (1 << H_PRED)
+};
- INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)
+enum {
+ INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
+ INTER_NEAREST = (1 << NEARESTMV),
+ INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV)
};
enum {
@@ -140,8 +143,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
for (i = 0; i < TX_SIZES; ++i) {
- sf->intra_y_mode_mask[i] = INTRA_DC_ONLY;
- sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+ sf->intra_y_mode_mask[i] = INTRA_DC;
+ sf->intra_uv_mode_mask[i] = INTRA_DC;
}
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
}
@@ -156,7 +159,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->adaptive_rd_thresh = 1;
sf->use_fast_coef_costing = 1;
- if (speed == 1) {
+ if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
@@ -179,13 +182,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
}
if (speed >= 2) {
- sf->use_square_partition_only = !frame_is_intra_only(cm);
- sf->less_rectangular_check = 1;
- sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
- : USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = cm->show_frame ?
- DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
+ : DISABLE_ALL_INTER_SPLIT;
else
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
@@ -193,28 +192,18 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
sf->adaptive_pred_interp_filter = 2;
- sf->auto_mv_step_size = 1;
sf->reference_masking = 1;
-
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
-
- sf->adaptive_rd_thresh = 2;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 3) {
@@ -246,15 +235,15 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+ sf->intra_uv_mode_mask[i] = INTRA_DC;
}
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->frame_parameter_update = 0;
sf->search_method = FAST_HEX;
- sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
- sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
+ sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
sf->max_intra_bsize = BLOCK_32X32;
sf->allow_skip_recode = 1;
}
@@ -285,7 +274,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
if (speed >= 7) {
int i;
for (i = 0; i < BLOCK_SIZES; ++i)
- sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->inter_mode_mask[i] = INTER_NEAREST;
}
}
@@ -302,7 +291,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->subpel_search_method = SUBPEL_TREE;
sf->subpel_iters_per_step = 2;
sf->subpel_force_stop = 0;
- sf->optimize_coefficients = !oxcf->lossless;
+ sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
sf->reduce_first_step_size = 0;
sf->auto_mv_step_size = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -330,8 +319,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->disable_split_var_thresh = 0;
sf->disable_filter_search_var_thresh = 0;
for (i = 0; i < TX_SIZES; i++) {
- sf->intra_y_mode_mask[i] = ALL_INTRA_MODES;
- sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES;
+ sf->intra_y_mode_mask[i] = INTRA_ALL;
+ sf->intra_uv_mode_mask[i] = INTRA_ALL;
}
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
@@ -343,7 +332,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
- sf->disable_inter_mode_mask[i] = 0;
+ sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index a54599e6a..3e7cd27d8 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -331,8 +331,8 @@ typedef struct SPEED_FEATURES {
int use_nonrd_pick_mode;
// A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
- // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
- int disable_inter_mode_mask[BLOCK_SIZES];
+ // modes are used in order from LSB to MSB for each BLOCK_SIZE.
+ int inter_mode_mask[BLOCK_SIZES];
// This feature controls whether we do the expensive context update and
// calculation in the rd coefficient costing loop.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 95ea1072d..1b995757a 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -54,7 +54,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
}
- lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level),
+ lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level_ms),
lc->target_bandwidth, 1000);
lrc->bits_off_target = lrc->buffer_level;
}
@@ -87,14 +87,14 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
}
bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
// Update buffer-related quantities.
- lc->starting_buffer_level =
- (int64_t)(oxcf->starting_buffer_level * bitrate_alloc);
- lc->optimal_buffer_level =
- (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc);
- lc->maximum_buffer_size =
- (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc);
- lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
- lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
+ lrc->starting_buffer_level =
+ (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+ lrc->optimal_buffer_level =
+ (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+ lrc->maximum_buffer_size =
+ (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+ lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
// Update framerate-related quantities.
if (svc->number_temporal_layers > 1) {
lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
@@ -149,20 +149,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {
oxcf->two_pass_vbrmin_section / 100);
lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth *
oxcf->two_pass_vbrmax_section) / 100);
- lrc->max_gf_interval = 16;
-
- lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
-
- if (oxcf->play_alternate && oxcf->lag_in_frames) {
- if (lrc->max_gf_interval > oxcf->lag_in_frames - 1)
- lrc->max_gf_interval = oxcf->lag_in_frames - 1;
-
- if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
-
- if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval)
- lrc->max_gf_interval = lrc->static_scene_max_gf_interval;
+ vp9_rc_set_gf_max_interval(oxcf, lrc);
}
void vp9_restore_layer_context(VP9_COMP *const cpi) {
@@ -173,9 +160,6 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
cpi->rc = lc->rc;
cpi->twopass = lc->twopass;
cpi->oxcf.target_bandwidth = lc->target_bandwidth;
- cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
- cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
- cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1) {
@@ -191,9 +175,6 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
lc->rc = cpi->rc;
lc->twopass = cpi->twopass;
lc->target_bandwidth = (int)oxcf->target_bandwidth;
- lc->starting_buffer_level = oxcf->starting_buffer_level;
- lc->optimal_buffer_level = oxcf->optimal_buffer_level;
- lc->maximum_buffer_size = oxcf->maximum_buffer_size;
}
void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 6881ce1e7..36e2027fd 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -22,9 +22,6 @@ extern "C" {
typedef struct {
RATE_CONTROL rc;
int target_bandwidth;
- int64_t starting_buffer_level;
- int64_t optimal_buffer_level;
- int64_t maximum_buffer_size;
double framerate;
int avg_frame_size;
TWO_PASS twopass;
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 02bed8988..eb5ae2e41 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -156,16 +156,15 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
}
-
-void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
+void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
}
-void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
+void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
}
diff --git a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
index 8723a7114..28458dcdd 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
@@ -23,6 +23,7 @@ pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
%endmacro
+TRANSFORM_COEFFS 11585, 11585
TRANSFORM_COEFFS 15137, 6270
TRANSFORM_COEFFS 16069, 3196
TRANSFORM_COEFFS 9102, 13623
@@ -83,7 +84,7 @@ SECTION .text
%endmacro
; 1D forward 8x8 DCT transform
-%macro FDCT8_1D 0
+%macro FDCT8_1D 1
SUM_SUB 0, 7, 9
SUM_SUB 1, 6, 9
SUM_SUB 2, 5, 9
@@ -92,14 +93,21 @@ SECTION .text
SUM_SUB 0, 3, 9
SUM_SUB 1, 2, 9
SUM_SUB 6, 5, 9
+%if %1 == 0
SUM_SUB 0, 1, 9
+%endif
BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
pmulhrsw m6, m12
pmulhrsw m5, m12
+%if %1 == 0
pmulhrsw m0, m12
pmulhrsw m1, m12
+%else
+ BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10
+ SWAP 0, 1
+%endif
SUM_SUB 4, 5, 9
SUM_SUB 7, 6, 9
@@ -150,10 +158,10 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride
psllw m7, 2
; column transform
- FDCT8_1D
+ FDCT8_1D 0
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
- FDCT8_1D
+ FDCT8_1D 1
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
DIVIDE_ROUND_2X 0, 1, 9, 10
diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
index 673e0b3a6..21aaa9383 100644
--- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
+++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
@@ -43,9 +43,9 @@ sym(vp9_temporal_filter_apply_sse2):
mov [rsp + rbp_backup], rbp
; end prolog
- mov rdx, arg(3)
+ mov edx, arg(3)
mov [rsp + block_width], rdx
- mov rdx, arg(4)
+ mov edx, arg(4)
mov [rsp + block_height], rdx
movd xmm6, arg(5)
movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index fb0fe58d3..72768e11e 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -331,8 +331,10 @@ static vpx_codec_err_t set_encoder_config(
oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
- oxcf->best_allowed_q = vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
- oxcf->worst_allowed_q = vp9_quantizer_to_qindex(cfg->rc_max_quantizer);
+ oxcf->best_allowed_q =
+ extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
+ oxcf->worst_allowed_q =
+ extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_max_quantizer);
oxcf->cq_level = vp9_quantizer_to_qindex(extra_cfg->cq_level);
oxcf->fixed_q = -1;
@@ -343,9 +345,9 @@ static vpx_codec_err_t set_encoder_config(
oxcf->scaled_frame_width = cfg->rc_scaled_width;
oxcf->scaled_frame_height = cfg->rc_scaled_height;
- oxcf->maximum_buffer_size = cfg->rc_buf_sz;
- oxcf->starting_buffer_level = cfg->rc_buf_initial_sz;
- oxcf->optimal_buffer_level = cfg->rc_buf_optimal_sz;
+ oxcf->maximum_buffer_size_ms = cfg->rc_buf_sz;
+ oxcf->starting_buffer_level_ms = cfg->rc_buf_initial_sz;
+ oxcf->optimal_buffer_level_ms = cfg->rc_buf_optimal_sz;
oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh;
@@ -376,8 +378,6 @@ static vpx_codec_err_t set_encoder_config(
oxcf->tile_columns = extra_cfg->tile_columns;
oxcf->tile_rows = extra_cfg->tile_rows;
- oxcf->lossless = extra_cfg->lossless;
-
oxcf->error_resilient_mode = cfg->g_error_resilient;
oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
@@ -1262,7 +1262,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
VPX_VBR, // rc_end_usage
#if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION)
- {0}, // rc_twopass_stats_in
+ {NULL, 0}, // rc_twopass_stats_in
#endif
256, // rc_target_bandwidth
0, // rc_min_quantizer
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 734ec4658..48110b414 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -38,7 +38,6 @@ struct vpx_codec_alg_priv {
vpx_decrypt_cb decrypt_cb;
void *decrypt_state;
vpx_image_t img;
- int img_avail;
int invert_tile_order;
// External frame buffer info to save for VP9 common.
@@ -48,10 +47,12 @@ struct vpx_codec_alg_priv {
};
static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
- vpx_codec_priv_enc_mr_cfg_t *data) {
+ vpx_codec_priv_enc_mr_cfg_t *data) {
// This function only allocates space for the vpx_codec_alg_priv_t
// structure. More memory may be required at the time the stream
// information becomes known.
+ (void)data;
+
if (!ctx->priv) {
vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv));
if (alg_priv == NULL)
@@ -243,14 +244,11 @@ static void init_decoder(vpx_codec_alg_priv_t *ctx) {
static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
const uint8_t **data, unsigned int data_sz,
void *user_priv, int64_t deadline) {
- YV12_BUFFER_CONFIG sd = { 0 };
vp9_ppflags_t flags = {0};
VP9_COMMON *cm = NULL;
(void)deadline;
- ctx->img_avail = 0;
-
// Determine the stream parameters. Note that we rely on peek_si to
// validate that we have a buffer that does not wrap around the top
// of the heap.
@@ -285,13 +283,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
set_ppflags(ctx, &flags);
- if (vp9_get_raw_frame(ctx->pbi, &sd, &flags))
- return update_error_state(ctx, &cm->error);
-
- yuvconfig2image(&ctx->img, &sd, user_priv);
- ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
- ctx->img_avail = 1;
-
return VPX_CODEC_OK;
}
@@ -420,15 +411,20 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
vpx_codec_iter_t *iter) {
vpx_image_t *img = NULL;
- if (ctx->img_avail) {
- // iter acts as a flip flop, so an image is only returned on the first
- // call to get_frame.
- if (!(*iter)) {
+ // iter acts as a flip flop, so an image is only returned on the first
+ // call to get_frame.
+ if (*iter == NULL && ctx->pbi != NULL) {
+ YV12_BUFFER_CONFIG sd;
+ vp9_ppflags_t flags = {0, 0, 0};
+
+ if (vp9_get_raw_frame(ctx->pbi, &sd, &flags) == 0) {
+ VP9_COMMON *cm = &ctx->pbi->common;
+ yuvconfig2image(&ctx->img, &sd, NULL);
+ ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
img = &ctx->img;
*iter = img;
}
}
- ctx->img_avail = 0;
return img;
}
@@ -631,11 +627,12 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
decoder_set_fb_fn, // vpx_codec_set_fb_fn_t
},
{ // NOLINT
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED
+ NOT_IMPLEMENTED, // vpx_codec_enc_cfg_map_t
+ NOT_IMPLEMENTED, // vpx_codec_encode_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_get_cx_data_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_enc_config_set_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_get_preview_frame_fn_t
+ NOT_IMPLEMENTED // vpx_codec_enc_mr_get_mem_loc_fn_t
}
};