summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_blockd.h2
-rw-r--r--vp9/common/vp9_pragmas.h30
-rw-r--r--vp9/common/vp9_rtcd_defs.pl60
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c140
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.h149
-rw-r--r--vp9/common/x86/vp9_idct_intrin_ssse3.c385
-rw-r--r--vp9/common/x86/vp9_idct_ssse3_x86_64.asm (renamed from vp9/common/x86/vp9_idct_ssse3.asm)0
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c32
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c36
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_ssse3.asm9
-rw-r--r--vp9/decoder/vp9_decodeframe.c74
-rw-r--r--vp9/decoder/vp9_decodeframe.h6
-rw-r--r--vp9/decoder/vp9_decoder.c11
-rw-r--r--vp9/decoder/vp9_decoder.h2
-rw-r--r--vp9/encoder/vp9_aq_complexity.c16
-rw-r--r--vp9/encoder/vp9_bitstream.c1
-rw-r--r--vp9/encoder/vp9_block.h1
-rw-r--r--vp9/encoder/vp9_encodeframe.c48
-rw-r--r--vp9/encoder/vp9_encodemb.c6
-rw-r--r--vp9/encoder/vp9_encoder.c9
-rw-r--r--vp9/encoder/vp9_firstpass.c365
-rw-r--r--vp9/encoder/vp9_firstpass.h16
-rw-r--r--vp9/encoder/vp9_mbgraph.c11
-rw-r--r--vp9/encoder/vp9_mcomp.c65
-rw-r--r--vp9/encoder/vp9_pickmode.c8
-rw-r--r--vp9/encoder/vp9_ratectrl.c61
-rw-r--r--vp9/encoder/vp9_ratectrl.h5
-rw-r--r--vp9/encoder/vp9_rdopt.c78
-rw-r--r--vp9/encoder/vp9_rdopt.h7
-rw-r--r--vp9/encoder/vp9_sad.c12
-rw-r--r--vp9/encoder/vp9_speed_features.c88
-rw-r--r--vp9/encoder/vp9_speed_features.h13
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c31
-rw-r--r--vp9/encoder/vp9_variance.c12
-rw-r--r--vp9/encoder/vp9_variance.h6
-rw-r--r--vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm (renamed from vp9/encoder/x86/vp9_dct_ssse3.asm)0
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm (renamed from vp9/encoder/x86/vp9_quantize_ssse3.asm)0
-rw-r--r--vp9/encoder/x86/vp9_ssim_opt_x86_64.asm (renamed from vp9/encoder/x86/vp9_ssim_opt.asm)0
-rw-r--r--vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm4
-rw-r--r--vp9/encoder/x86/vp9_variance_avx2.c1
-rw-r--r--vp9/encoder/x86/vp9_variance_mmx.c1
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c330
-rw-r--r--vp9/vp9_common.mk6
-rw-r--r--vp9/vp9_cx_iface.c274
-rw-r--r--vp9/vp9_dx_iface.c69
-rw-r--r--vp9/vp9cx.mk6
46 files changed, 1372 insertions, 1114 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 8ca356dd6..9088b0bde 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -228,8 +228,6 @@ typedef struct macroblockd {
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
int lossless;
- /* Inverse transform function pointers. */
- void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
int corrupted;
diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h
deleted file mode 100644
index 0efc713ca..000000000
--- a/vp9/common/vp9_pragmas.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_PRAGMAS_H_
-#define VP9_COMMON_VP9_PRAGMAS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __INTEL_COMPILER
-#pragma warning(disable:997 1011 170)
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(disable:4799)
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_COMMON_VP9_PRAGMAS_H_
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 182739620..a18ca8ea6 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -356,7 +356,7 @@ specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_256_add sse2 neon_asm dspr2/;
+specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
@@ -431,6 +431,12 @@ specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
+add_proto qw/void vp9_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get8x8var mmx/, "$sse2_x86inc";
+
+add_proto qw/void vp9_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get16x16var avx2/, "$sse2_x86inc";
+
add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x4/, "$sse2_x86inc";
@@ -520,82 +526,82 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad64x64/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad32x64/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad64x32/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad32x16/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x32/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad32x32/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad8x4/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad4x8/, "$sse_x86inc";
-add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
-add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
-add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
-add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 0231726dc..ff9c43221 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -8,12 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include "./vpx_config.h"
-#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_idct.h"
+#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
#define RECON_AND_STORE4X4(dest, in_x) \
{ \
@@ -527,16 +522,6 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
out7 = _mm_subs_epi16(stp1_0, stp2_7); \
}
-#define RECON_AND_STORE(dest, in_x) \
- { \
- __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
- d0 = _mm_unpacklo_epi8(d0, zero); \
- d0 = _mm_add_epi16(in_x, d0); \
- d0 = _mm_packus_epi16(d0, d0); \
- _mm_storel_epi64((__m128i *)(dest), d0); \
- dest += stride; \
- }
-
void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
@@ -627,36 +612,6 @@ void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, dc_value);
}
-// perform 8x8 transpose
-static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
- const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
- const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
- const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
- const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
- const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
- const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
- const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
- const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
-
- const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
- const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
- const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
- const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
- const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
- const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
- const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
- const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
-
- res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
- res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
- res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
- res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
- res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
- res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
- res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
- res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
-}
-
static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
@@ -1573,23 +1528,6 @@ void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
}
}
-static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
- __m128i tbuf[8];
- array_transpose_8x8(res0, res0);
- array_transpose_8x8(res1, tbuf);
- array_transpose_8x8(res0 + 8, res1);
- array_transpose_8x8(res1 + 8, res1 + 8);
-
- res0[8] = tbuf[0];
- res0[9] = tbuf[1];
- res0[10] = tbuf[2];
- res0[11] = tbuf[3];
- res0[12] = tbuf[4];
- res0[13] = tbuf[5];
- res0[14] = tbuf[6];
- res0[15] = tbuf[7];
-}
-
static void iadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
@@ -2416,82 +2354,6 @@ static void iadst16_sse2(__m128i *in0, __m128i *in1) {
iadst16_8col(in1);
}
-static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
- in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
- in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
- in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
- in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
- in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
- in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
- in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
- in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
-
- in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
- in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
- in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
- in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
- in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
- in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
- in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
- in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
-}
-
-static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
- const __m128i final_rounding = _mm_set1_epi16(1<<5);
- const __m128i zero = _mm_setzero_si128();
- // Final rounding and shift
- in[0] = _mm_adds_epi16(in[0], final_rounding);
- in[1] = _mm_adds_epi16(in[1], final_rounding);
- in[2] = _mm_adds_epi16(in[2], final_rounding);
- in[3] = _mm_adds_epi16(in[3], final_rounding);
- in[4] = _mm_adds_epi16(in[4], final_rounding);
- in[5] = _mm_adds_epi16(in[5], final_rounding);
- in[6] = _mm_adds_epi16(in[6], final_rounding);
- in[7] = _mm_adds_epi16(in[7], final_rounding);
- in[8] = _mm_adds_epi16(in[8], final_rounding);
- in[9] = _mm_adds_epi16(in[9], final_rounding);
- in[10] = _mm_adds_epi16(in[10], final_rounding);
- in[11] = _mm_adds_epi16(in[11], final_rounding);
- in[12] = _mm_adds_epi16(in[12], final_rounding);
- in[13] = _mm_adds_epi16(in[13], final_rounding);
- in[14] = _mm_adds_epi16(in[14], final_rounding);
- in[15] = _mm_adds_epi16(in[15], final_rounding);
-
- in[0] = _mm_srai_epi16(in[0], 6);
- in[1] = _mm_srai_epi16(in[1], 6);
- in[2] = _mm_srai_epi16(in[2], 6);
- in[3] = _mm_srai_epi16(in[3], 6);
- in[4] = _mm_srai_epi16(in[4], 6);
- in[5] = _mm_srai_epi16(in[5], 6);
- in[6] = _mm_srai_epi16(in[6], 6);
- in[7] = _mm_srai_epi16(in[7], 6);
- in[8] = _mm_srai_epi16(in[8], 6);
- in[9] = _mm_srai_epi16(in[9], 6);
- in[10] = _mm_srai_epi16(in[10], 6);
- in[11] = _mm_srai_epi16(in[11], 6);
- in[12] = _mm_srai_epi16(in[12], 6);
- in[13] = _mm_srai_epi16(in[13], 6);
- in[14] = _mm_srai_epi16(in[14], 6);
- in[15] = _mm_srai_epi16(in[15], 6);
-
- RECON_AND_STORE(dest, in[0]);
- RECON_AND_STORE(dest, in[1]);
- RECON_AND_STORE(dest, in[2]);
- RECON_AND_STORE(dest, in[3]);
- RECON_AND_STORE(dest, in[4]);
- RECON_AND_STORE(dest, in[5]);
- RECON_AND_STORE(dest, in[6]);
- RECON_AND_STORE(dest, in[7]);
- RECON_AND_STORE(dest, in[8]);
- RECON_AND_STORE(dest, in[9]);
- RECON_AND_STORE(dest, in[10]);
- RECON_AND_STORE(dest, in[11]);
- RECON_AND_STORE(dest, in[12]);
- RECON_AND_STORE(dest, in[13]);
- RECON_AND_STORE(dest, in[14]);
- RECON_AND_STORE(dest, in[15]);
-}
-
void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
__m128i in0[16], in1[16];
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.h b/vp9/common/x86/vp9_idct_intrin_sse2.h
new file mode 100644
index 000000000..1c62e3272
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+// perform 8x8 transpose
+static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+
+ res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
+ res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
+ res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
+ res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
+ res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
+ res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
+ res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
+ res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
+}
+
+static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
+ __m128i tbuf[8];
+ array_transpose_8x8(res0, res0);
+ array_transpose_8x8(res1, tbuf);
+ array_transpose_8x8(res0 + 8, res1);
+ array_transpose_8x8(res1 + 8, res1 + 8);
+
+ res0[8] = tbuf[0];
+ res0[9] = tbuf[1];
+ res0[10] = tbuf[2];
+ res0[11] = tbuf[3];
+ res0[12] = tbuf[4];
+ res0[13] = tbuf[5];
+ res0[14] = tbuf[6];
+ res0[15] = tbuf[7];
+}
+
+static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
+
+ in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
+ in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
+ in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
+ in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
+ in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
+ in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
+ in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
+ in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
+}
+
+#define RECON_AND_STORE(dest, in_x) \
+ { \
+ __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
+ d0 = _mm_unpacklo_epi8(d0, zero); \
+ d0 = _mm_add_epi16(in_x, d0); \
+ d0 = _mm_packus_epi16(d0, d0); \
+ _mm_storel_epi64((__m128i *)(dest), d0); \
+ dest += stride; \
+ }
+
+static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
+ const __m128i final_rounding = _mm_set1_epi16(1<<5);
+ const __m128i zero = _mm_setzero_si128();
+ // Final rounding and shift
+ in[0] = _mm_adds_epi16(in[0], final_rounding);
+ in[1] = _mm_adds_epi16(in[1], final_rounding);
+ in[2] = _mm_adds_epi16(in[2], final_rounding);
+ in[3] = _mm_adds_epi16(in[3], final_rounding);
+ in[4] = _mm_adds_epi16(in[4], final_rounding);
+ in[5] = _mm_adds_epi16(in[5], final_rounding);
+ in[6] = _mm_adds_epi16(in[6], final_rounding);
+ in[7] = _mm_adds_epi16(in[7], final_rounding);
+ in[8] = _mm_adds_epi16(in[8], final_rounding);
+ in[9] = _mm_adds_epi16(in[9], final_rounding);
+ in[10] = _mm_adds_epi16(in[10], final_rounding);
+ in[11] = _mm_adds_epi16(in[11], final_rounding);
+ in[12] = _mm_adds_epi16(in[12], final_rounding);
+ in[13] = _mm_adds_epi16(in[13], final_rounding);
+ in[14] = _mm_adds_epi16(in[14], final_rounding);
+ in[15] = _mm_adds_epi16(in[15], final_rounding);
+
+ in[0] = _mm_srai_epi16(in[0], 6);
+ in[1] = _mm_srai_epi16(in[1], 6);
+ in[2] = _mm_srai_epi16(in[2], 6);
+ in[3] = _mm_srai_epi16(in[3], 6);
+ in[4] = _mm_srai_epi16(in[4], 6);
+ in[5] = _mm_srai_epi16(in[5], 6);
+ in[6] = _mm_srai_epi16(in[6], 6);
+ in[7] = _mm_srai_epi16(in[7], 6);
+ in[8] = _mm_srai_epi16(in[8], 6);
+ in[9] = _mm_srai_epi16(in[9], 6);
+ in[10] = _mm_srai_epi16(in[10], 6);
+ in[11] = _mm_srai_epi16(in[11], 6);
+ in[12] = _mm_srai_epi16(in[12], 6);
+ in[13] = _mm_srai_epi16(in[13], 6);
+ in[14] = _mm_srai_epi16(in[14], 6);
+ in[15] = _mm_srai_epi16(in[15], 6);
+
+ RECON_AND_STORE(dest, in[0]);
+ RECON_AND_STORE(dest, in[1]);
+ RECON_AND_STORE(dest, in[2]);
+ RECON_AND_STORE(dest, in[3]);
+ RECON_AND_STORE(dest, in[4]);
+ RECON_AND_STORE(dest, in[5]);
+ RECON_AND_STORE(dest, in[6]);
+ RECON_AND_STORE(dest, in[7]);
+ RECON_AND_STORE(dest, in[8]);
+ RECON_AND_STORE(dest, in[9]);
+ RECON_AND_STORE(dest, in[10]);
+ RECON_AND_STORE(dest, in[11]);
+ RECON_AND_STORE(dest, in[12]);
+ RECON_AND_STORE(dest, in[13]);
+ RECON_AND_STORE(dest, in[14]);
+ RECON_AND_STORE(dest, in[15]);
+}
diff --git a/vp9/common/x86/vp9_idct_intrin_ssse3.c b/vp9/common/x86/vp9_idct_intrin_ssse3.c
new file mode 100644
index 000000000..0930e7805
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#if defined(_MSC_VER) && _MSC_VER <= 1500
+// Need to include math.h before calling tmmintrin.h/intrin.h
+// in certain versions of MSVS.
+#include <math.h>
+#endif
+#include <tmmintrin.h> // SSSE3
+#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
+
+static void idct16_8col(__m128i *in, int round) {
+ const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+ const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i v[16], u[16], s[16], t[16];
+
+ // stage 1
+ s[0] = in[0];
+ s[1] = in[8];
+ s[2] = in[4];
+ s[3] = in[12];
+ s[4] = in[2];
+ s[5] = in[10];
+ s[6] = in[6];
+ s[7] = in[14];
+ s[8] = in[1];
+ s[9] = in[9];
+ s[10] = in[5];
+ s[11] = in[13];
+ s[12] = in[3];
+ s[13] = in[11];
+ s[14] = in[7];
+ s[15] = in[15];
+
+ // stage 2
+ u[0] = _mm_unpacklo_epi16(s[8], s[15]);
+ u[1] = _mm_unpackhi_epi16(s[8], s[15]);
+ u[2] = _mm_unpacklo_epi16(s[9], s[14]);
+ u[3] = _mm_unpackhi_epi16(s[9], s[14]);
+ u[4] = _mm_unpacklo_epi16(s[10], s[13]);
+ u[5] = _mm_unpackhi_epi16(s[10], s[13]);
+ u[6] = _mm_unpacklo_epi16(s[11], s[12]);
+ u[7] = _mm_unpackhi_epi16(s[11], s[12]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ s[8] = _mm_packs_epi32(u[0], u[1]);
+ s[15] = _mm_packs_epi32(u[2], u[3]);
+ s[9] = _mm_packs_epi32(u[4], u[5]);
+ s[14] = _mm_packs_epi32(u[6], u[7]);
+ s[10] = _mm_packs_epi32(u[8], u[9]);
+ s[13] = _mm_packs_epi32(u[10], u[11]);
+ s[11] = _mm_packs_epi32(u[12], u[13]);
+ s[12] = _mm_packs_epi32(u[14], u[15]);
+
+ // stage 3
+ t[0] = s[0];
+ t[1] = s[1];
+ t[2] = s[2];
+ t[3] = s[3];
+ u[0] = _mm_unpacklo_epi16(s[4], s[7]);
+ u[1] = _mm_unpackhi_epi16(s[4], s[7]);
+ u[2] = _mm_unpacklo_epi16(s[5], s[6]);
+ u[3] = _mm_unpackhi_epi16(s[5], s[6]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ t[4] = _mm_packs_epi32(u[0], u[1]);
+ t[7] = _mm_packs_epi32(u[2], u[3]);
+ t[5] = _mm_packs_epi32(u[4], u[5]);
+ t[6] = _mm_packs_epi32(u[6], u[7]);
+ t[8] = _mm_add_epi16(s[8], s[9]);
+ t[9] = _mm_sub_epi16(s[8], s[9]);
+ t[10] = _mm_sub_epi16(s[11], s[10]);
+ t[11] = _mm_add_epi16(s[10], s[11]);
+ t[12] = _mm_add_epi16(s[12], s[13]);
+ t[13] = _mm_sub_epi16(s[12], s[13]);
+ t[14] = _mm_sub_epi16(s[15], s[14]);
+ t[15] = _mm_add_epi16(s[14], s[15]);
+
+ // stage 4
+ u[0] = _mm_add_epi16(t[0], t[1]);
+ u[1] = _mm_sub_epi16(t[0], t[1]);
+ u[2] = _mm_unpacklo_epi16(t[2], t[3]);
+ u[3] = _mm_unpackhi_epi16(t[2], t[3]);
+ u[4] = _mm_unpacklo_epi16(t[9], t[14]);
+ u[5] = _mm_unpackhi_epi16(t[9], t[14]);
+ u[6] = _mm_unpacklo_epi16(t[10], t[13]);
+ u[7] = _mm_unpackhi_epi16(t[10], t[13]);
+
+ s[0] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+ s[1] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24);
+
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ s[2] = _mm_packs_epi32(u[4], u[5]);
+ s[3] = _mm_packs_epi32(u[6], u[7]);
+ s[4] = _mm_add_epi16(t[4], t[5]);
+ s[5] = _mm_sub_epi16(t[4], t[5]);
+ s[6] = _mm_sub_epi16(t[7], t[6]);
+ s[7] = _mm_add_epi16(t[6], t[7]);
+ s[8] = t[8];
+ s[15] = t[15];
+ s[9] = _mm_packs_epi32(u[8], u[9]);
+ s[14] = _mm_packs_epi32(u[10], u[11]);
+ s[10] = _mm_packs_epi32(u[12], u[13]);
+ s[13] = _mm_packs_epi32(u[14], u[15]);
+ s[11] = t[11];
+ s[12] = t[12];
+
+ // stage 5
+ t[0] = _mm_add_epi16(s[0], s[3]);
+ t[1] = _mm_add_epi16(s[1], s[2]);
+ t[2] = _mm_sub_epi16(s[1], s[2]);
+ t[3] = _mm_sub_epi16(s[0], s[3]);
+ t[4] = s[4];
+ t[7] = s[7];
+
+ u[0] = _mm_sub_epi16(s[6], s[5]);
+ u[1] = _mm_add_epi16(s[6], s[5]);
+ t[5] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+ t[6] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+
+ t[8] = _mm_add_epi16(s[8], s[11]);
+ t[9] = _mm_add_epi16(s[9], s[10]);
+ t[10] = _mm_sub_epi16(s[9], s[10]);
+ t[11] = _mm_sub_epi16(s[8], s[11]);
+ t[12] = _mm_sub_epi16(s[15], s[12]);
+ t[13] = _mm_sub_epi16(s[14], s[13]);
+ t[14] = _mm_add_epi16(s[13], s[14]);
+ t[15] = _mm_add_epi16(s[12], s[15]);
+
+ // stage 6
+ if (round == 1) {
+ s[0] = _mm_add_epi16(t[0], t[7]);
+ s[1] = _mm_add_epi16(t[1], t[6]);
+ s[2] = _mm_add_epi16(t[2], t[5]);
+ s[3] = _mm_add_epi16(t[3], t[4]);
+ s[4] = _mm_sub_epi16(t[3], t[4]);
+ s[5] = _mm_sub_epi16(t[2], t[5]);
+ s[6] = _mm_sub_epi16(t[1], t[6]);
+ s[7] = _mm_sub_epi16(t[0], t[7]);
+ s[8] = t[8];
+ s[9] = t[9];
+
+ u[0] = _mm_unpacklo_epi16(t[10], t[13]);
+ u[1] = _mm_unpackhi_epi16(t[10], t[13]);
+ u[2] = _mm_unpacklo_epi16(t[11], t[12]);
+ u[3] = _mm_unpackhi_epi16(t[11], t[12]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ s[10] = _mm_packs_epi32(u[0], u[1]);
+ s[13] = _mm_packs_epi32(u[2], u[3]);
+ s[11] = _mm_packs_epi32(u[4], u[5]);
+ s[12] = _mm_packs_epi32(u[6], u[7]);
+ s[14] = t[14];
+ s[15] = t[15];
+ } else {
+ s[0] = _mm_add_epi16(t[0], t[7]);
+ s[1] = _mm_add_epi16(t[1], t[6]);
+ s[2] = _mm_add_epi16(t[2], t[5]);
+ s[3] = _mm_add_epi16(t[3], t[4]);
+ s[4] = _mm_sub_epi16(t[3], t[4]);
+ s[5] = _mm_sub_epi16(t[2], t[5]);
+ s[6] = _mm_sub_epi16(t[1], t[6]);
+ s[7] = _mm_sub_epi16(t[0], t[7]);
+ s[8] = t[8];
+ s[9] = t[9];
+
+ u[0] = _mm_sub_epi16(t[13], t[10]);
+ u[1] = _mm_add_epi16(t[13], t[10]);
+ u[2] = _mm_sub_epi16(t[12], t[11]);
+ u[3] = _mm_add_epi16(t[12], t[11]);
+
+ s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+ s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+ s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2);
+ s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2);
+ s[14] = t[14];
+ s[15] = t[15];
+ }
+
+ // stage 7
+ in[0] = _mm_add_epi16(s[0], s[15]);
+ in[1] = _mm_add_epi16(s[1], s[14]);
+ in[2] = _mm_add_epi16(s[2], s[13]);
+ in[3] = _mm_add_epi16(s[3], s[12]);
+ in[4] = _mm_add_epi16(s[4], s[11]);
+ in[5] = _mm_add_epi16(s[5], s[10]);
+ in[6] = _mm_add_epi16(s[6], s[9]);
+ in[7] = _mm_add_epi16(s[7], s[8]);
+ in[8] = _mm_sub_epi16(s[7], s[8]);
+ in[9] = _mm_sub_epi16(s[6], s[9]);
+ in[10] = _mm_sub_epi16(s[5], s[10]);
+ in[11] = _mm_sub_epi16(s[4], s[11]);
+ in[12] = _mm_sub_epi16(s[3], s[12]);
+ in[13] = _mm_sub_epi16(s[2], s[13]);
+ in[14] = _mm_sub_epi16(s[1], s[14]);
+ in[15] = _mm_sub_epi16(s[0], s[15]);
+}
+
+static void idct16_sse2(__m128i *in0, __m128i *in1, int round) {
+ array_transpose_16x16(in0, in1);
+ idct16_8col(in0, round);
+ idct16_8col(in1, round);
+}
+
+void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest,
+ int stride) {
+ __m128i in0[16], in1[16];
+
+ load_buffer_8x16(input, in0);
+ input += 8;
+ load_buffer_8x16(input, in1);
+
+ idct16_sse2(in0, in1, 0);
+ idct16_sse2(in0, in1, 1);
+
+ write_buffer_8x16(dest, in0, stride);
+ dest += 8;
+ write_buffer_8x16(dest, in1, stride);
+}
diff --git a/vp9/common/x86/vp9_idct_ssse3.asm b/vp9/common/x86/vp9_idct_ssse3_x86_64.asm
index 2c1060710..2c1060710 100644
--- a/vp9/common/x86/vp9_idct_ssse3.asm
+++ b/vp9/common/x86/vp9_idct_ssse3_x86_64.asm
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index b84db970e..d109e136a 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -111,21 +111,21 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
// filter the source buffer
srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
- srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
+ srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+ srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
// add and saturate the results together
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
// filter the source buffer
- srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
+ srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+ srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
// add and saturate the results together
@@ -146,21 +146,21 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
// filter the source buffer
srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
+ srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+ srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
// add and saturate the results together
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
// filter the source buffer
- srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
+ srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+ srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
// add and saturate the results together
@@ -208,26 +208,26 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1,
_mm256_castsi256_si128(filt1Reg));
srcRegFilt2 = _mm_shuffle_epi8(srcReg1,
- _mm256_castsi256_si128(filt2Reg));
+ _mm256_castsi256_si128(filt4Reg));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1,
_mm256_castsi256_si128(firstFilters));
srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
- _mm256_castsi256_si128(secondFilters));
+ _mm256_castsi256_si128(forthFilters));
// add and saturate the results together
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
// filter the source buffer
srcRegFilt3= _mm_shuffle_epi8(srcReg1,
- _mm256_castsi256_si128(filt4Reg));
+ _mm256_castsi256_si128(filt2Reg));
srcRegFilt2= _mm_shuffle_epi8(srcReg1,
_mm256_castsi256_si128(filt3Reg));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
- _mm256_castsi256_si128(forthFilters));
+ _mm256_castsi256_si128(secondFilters));
srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
_mm256_castsi256_si128(thirdFilters));
@@ -247,26 +247,26 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2,
_mm256_castsi256_si128(filt1Reg));
srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
- _mm256_castsi256_si128(filt2Reg));
+ _mm256_castsi256_si128(filt4Reg));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1,
_mm256_castsi256_si128(firstFilters));
srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
- _mm256_castsi256_si128(secondFilters));
+ _mm256_castsi256_si128(forthFilters));
// add and saturate the results together
srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
// filter the source buffer
srcRegFilt3 = _mm_shuffle_epi8(srcReg2,
- _mm256_castsi256_si128(filt4Reg));
+ _mm256_castsi256_si128(filt2Reg));
srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
_mm256_castsi256_si128(filt3Reg));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
- _mm256_castsi256_si128(forthFilters));
+ _mm256_castsi256_si128(secondFilters));
srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
_mm256_castsi256_si128(thirdFilters));
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
index cf28d8d2b..c4efa6565 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
@@ -44,7 +44,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
unsigned int output_pitch,
unsigned int output_height,
int16_t *filter) {
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
+ __m128i firstFilters, secondFilters, shuffle1, shuffle2;
__m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
__m128i addFilterReg64, filtersReg, srcReg, minReg;
unsigned int i;
@@ -61,20 +61,22 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
// duplicate only the third 16 bit in the filter into the first lane
secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
// duplicate only the seconds 16 bits in the filter into the second lane
+ // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3
firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
// duplicate only the forth 16 bits in the filter into the second lane
+ // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7
secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
// loading the local filters
- thirdFilters =_mm_load_si128((__m128i const *)filt1_4_h8);
- forthFilters = _mm_load_si128((__m128i const *)filt2_4_h8);
+ shuffle1 =_mm_load_si128((__m128i const *)filt1_4_h8);
+ shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8);
for (i = 0; i < output_height; i++) {
srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
// filter the source buffer
- srcRegFilt1= _mm_shuffle_epi8(srcReg, thirdFilters);
- srcRegFilt2= _mm_shuffle_epi8(srcReg, forthFilters);
+ srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg, shuffle2);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
@@ -164,12 +166,12 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters);
// add and saturate all the results together
- minReg = _mm_min_epi16(srcRegFilt4, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
+ minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
- srcRegFilt4= _mm_max_epi16(srcRegFilt4, srcRegFilt3);
+ srcRegFilt2= _mm_max_epi16(srcRegFilt2, srcRegFilt3);
srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
// shift by 7 bit each 16 bits
@@ -229,21 +231,21 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
// filter the source buffer
srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt2Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters);
// add and saturate the results together
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
// filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt4Reg);
+ srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt2Reg);
srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
// add and saturate the results together
@@ -260,21 +262,21 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
// filter the source buffer
srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt2Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters);
// add and saturate the results together
srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
// filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt4Reg);
+ srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt2Reg);
srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
// add and saturate the results together
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index 634fa7746..fd781d4bc 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -272,22 +272,23 @@
punpcklbw xmm2, xmm3 ;C D
punpcklbw xmm4, xmm5 ;E F
-
movq xmm6, [rsi + rbx + 8] ;G
movq xmm7, [rax + rbx + 8] ;H
punpcklbw xmm6, xmm7 ;G H
-
pmaddubsw xmm0, k0k1
pmaddubsw xmm2, k2k3
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
paddsw xmm0, xmm6
- paddsw xmm0, xmm2
+ movdqa xmm1, xmm2
+ pmaxsw xmm2, xmm4
+ pminsw xmm4, xmm1
paddsw xmm0, xmm4
- paddsw xmm0, krd
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index de58939fc..fc70035f2 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -195,30 +195,32 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
struct macroblockd_plane *const pd = &xd->plane[plane];
if (eob > 0) {
TX_TYPE tx_type;
- const PLANE_TYPE plane_type = pd->plane_type;
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- switch (tx_size) {
- case TX_4X4:
- tx_type = get_tx_type_4x4(plane_type, xd, block);
- if (tx_type == DCT_DCT)
- xd->itxm_add(dqcoeff, dst, stride, eob);
- else
- vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type);
- break;
- case TX_8X8:
- tx_type = get_tx_type(plane_type, xd);
- vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_16X16:
- tx_type = get_tx_type(plane_type, xd);
- vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_32X32:
- tx_type = DCT_DCT;
- vp9_idct32x32_add(dqcoeff, dst, stride, eob);
- break;
- default:
- assert(0 && "Invalid transform size");
+ if (xd->lossless) {
+ tx_type = DCT_DCT;
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ const PLANE_TYPE plane_type = pd->plane_type;
+ switch (tx_size) {
+ case TX_4X4:
+ tx_type = get_tx_type_4x4(plane_type, xd, block);
+ vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ tx_type = get_tx_type(plane_type, xd);
+ vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ tx_type = get_tx_type(plane_type, xd);
+ vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ tx_type = DCT_DCT;
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ }
}
if (eob == 1) {
@@ -588,8 +590,6 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd,
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
-
- xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
}
static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
@@ -802,7 +802,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
CHECK_MEM_ERROR(
cm,
pbi->tile_data,
- vpx_malloc(tile_cols * tile_rows * (sizeof(*pbi->tile_data))));
+ vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data))));
pbi->total_tiles = tile_rows * tile_cols;
}
@@ -1317,16 +1317,15 @@ static struct vp9_read_bit_buffer* init_read_bit_buffer(
return rb;
}
-int vp9_decode_frame(VP9Decoder *pbi,
- const uint8_t *data, const uint8_t *data_end,
- const uint8_t **p_data_end) {
+void vp9_decode_frame(VP9Decoder *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
struct vp9_read_bit_buffer rb = { 0 };
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
- const int keyframe = cm->frame_type == KEY_FRAME;
const int tile_rows = 1 << cm->log2_tile_rows;
const int tile_cols = 1 << cm->log2_tile_cols;
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
@@ -1335,12 +1334,9 @@ int vp9_decode_frame(VP9Decoder *pbi,
if (!first_partition_size) {
// showing a frame directly
*p_data_end = data + 1;
- return 0;
+ return;
}
- if (!pbi->decoded_key_frame && !keyframe)
- return -1;
-
data += vp9_rb_bytes_read(&rb);
if (!read_is_valid(data, first_partition_size, data_end))
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
@@ -1377,14 +1373,6 @@ int vp9_decode_frame(VP9Decoder *pbi,
new_fb->corrupted |= xd->corrupted;
- if (!pbi->decoded_key_frame) {
- if (keyframe && !new_fb->corrupted)
- pbi->decoded_key_frame = 1;
- else
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "A stream must start with a complete key frame");
- }
-
if (!new_fb->corrupted) {
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
vp9_adapt_coef_probs(cm);
@@ -1400,6 +1388,4 @@ int vp9_decode_frame(VP9Decoder *pbi,
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
-
- return 0;
}
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index 8a19dafc5..fb15645a9 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -21,9 +21,9 @@ struct VP9Decoder;
void vp9_init_dequantizer(struct VP9Common *cm);
-int vp9_decode_frame(struct VP9Decoder *pbi,
- const uint8_t *data, const uint8_t *data_end,
- const uint8_t **p_data_end);
+void vp9_decode_frame(struct VP9Decoder *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ const uint8_t **p_data_end);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 8902f179d..13d79ff44 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -67,7 +67,6 @@ VP9Decoder *vp9_decoder_create() {
cm->current_video_frame = 0;
pbi->ready_for_new_data = 1;
- pbi->decoded_key_frame = 0;
// vp9_init_dequantizer() is first called here. Add check in
// frame_init_dequantizer() to avoid unnecessary calling of
@@ -267,15 +266,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
cm->error.setjmp = 1;
- retcode = vp9_decode_frame(pbi, source, source + size, psource);
-
- if (retcode < 0) {
- cm->error.error_code = VPX_CODEC_ERROR;
- cm->error.setjmp = 0;
- if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
- cm->frame_bufs[cm->new_fb_idx].ref_count--;
- return retcode;
- }
+ vp9_decode_frame(pbi, source, source + size, psource);
swap_frame_buffers(pbi);
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 1a5576e5a..a6edf0cbd 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -43,8 +43,6 @@ typedef struct VP9Decoder {
int refresh_frame_flags;
- int decoded_key_frame;
-
VP9Worker lf_worker;
VP9Worker *tile_workers;
int num_tile_workers;
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c
index 47ad8d8cc..0d6b41d15 100644
--- a/vp9/encoder/vp9_aq_complexity.c
+++ b/vp9/encoder/vp9_aq_complexity.c
@@ -47,11 +47,21 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
// Use some of the segments for in frame Q adjustment.
for (segment = 1; segment < 2; segment++) {
- const int qindex_delta =
+ int qindex_delta =
vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
in_frame_q_adj_ratio[segment]);
- vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
- vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+
+ // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0.
+ // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta
+ // is sometimes applied without going back around the rd loop.
+ // This could lead to an illegal combination of partition size and q.
+ if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
+ qindex_delta = -cm->base_qindex + 1;
+ }
+ if ((cm->base_qindex + qindex_delta) > 0) {
+ vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
+ vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+ }
}
}
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 8ef2b2eed..76f5e7bbe 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -20,7 +20,6 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_mvref_common.h"
-#include "vp9/common/vp9_pragmas.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index c406860a0..c3cd93b78 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -109,6 +109,7 @@ struct macroblock {
MV pred_mv[MAX_REF_FRAMES];
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
+ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
};
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ef33fcaf1..37fb0f3c6 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -76,18 +76,6 @@ typedef struct {
unsigned int var;
} diff;
-static void get_sse_sum_8x8(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum) {
- variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum);
-}
-
-static void get_sse_sum_16x16(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum) {
- variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum);
-}
-
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
@@ -490,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi,
unsigned int sse = 0;
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high)
- get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
- d + y_idx * dp + x_idx, dp, &sse, &sum);
+ vp9_get8x8var(s + y_idx * sp + x_idx, sp,
+ d + y_idx * dp + x_idx, dp, &sse, &sum);
fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
}
}
@@ -1226,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
int b_offset = b_mi_row * MI_SIZE * src_stride +
b_mi_col * MI_SIZE;
- get_sse_sum_16x16(src + b_offset, src_stride,
- pre_src + b_offset, pre_stride,
- &d16[j].sse, &d16[j].sum);
+ vp9_get16x16var(src + b_offset, src_stride,
+ pre_src + b_offset, pre_stride,
+ &d16[j].sse, &d16[j].sum);
d16[j].var = d16[j].sse -
(((uint32_t)d16[j].sum * d16[j].sum) >> 8);
@@ -1303,14 +1291,14 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
if (row8x8_remaining >= MI_BLOCK_SIZE &&
col8x8_remaining >= MI_BLOCK_SIZE) {
this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride,
- pre, pre_stride, 0x7fffffff);
+ pre, pre_stride);
threshold = (1 << 12);
} else {
int r, c;
for (r = 0; r < row8x8_remaining; r += 2)
for (c = 0; c < col8x8_remaining; c += 2)
- this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, pre,
- pre_stride, 0x7fffffff);
+ this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride,
+ pre, pre_stride);
threshold = (row8x8_remaining * col8x8_remaining) << 6;
}
@@ -1318,7 +1306,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
return x->in_static_area;
}
-static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
+static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8,
+ const int motion_thresh) {
const int mis = cm->mi_stride;
int block_row, block_col;
@@ -1327,8 +1316,8 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
for (block_col = 0; block_col < 8; ++block_col) {
const MODE_INFO *prev_mi = prev_mi_8x8[block_row * mis + block_col];
if (prev_mi) {
- if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 ||
- abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8)
+ if (abs(prev_mi->mbmi.mv[0].as_mv.row) > motion_thresh ||
+ abs(prev_mi->mbmi.mv[0].as_mv.col) > motion_thresh)
return 1;
}
}
@@ -2324,7 +2313,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
|| cpi->rc.is_src_frame_alt_ref
|| ((sf->use_lastframe_partitioning ==
LAST_FRAME_PARTITION_LOW_MOTION) &&
- sb_has_motion(cm, prev_mi))) {
+ sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2337,7 +2326,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
cpi->pc_root);
} else {
if (sf->constrain_copy_partition &&
- sb_has_motion(cm, prev_mi))
+ sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
constrain_copy_partitioning(cpi, tile, mi, prev_mi,
mi_row, mi_col, BLOCK_16X16);
else
@@ -2384,7 +2373,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
if (lossless) {
// printf("Switching to lossless\n");
cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
- cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
+ cpi->mb.itxm_add = vp9_iwht4x4_add;
cpi->mb.optimize = 0;
cpi->common.lf.filter_level = 0;
cpi->zbin_mode_boost_enabled = 0;
@@ -2392,7 +2381,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
} else {
// printf("Not lossless\n");
cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
- cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
+ cpi->mb.itxm_add = vp9_idct4x4_add;
}
}
@@ -2869,7 +2858,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
if (mi_row + hbs < cm->mi_rows) {
nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
&rate, &dist, subsize);
- pc_tree->horizontal[1].mic.mbmi = mi[0]->mbmi;
+ pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -3368,7 +3357,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
} else {
mbmi->skip = 1;
- if (output_enabled)
+ if (output_enabled &&
+ !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
cm->counts.skip[vp9_get_skip_context(xd)][1]++;
reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 3b231b7f2..8581e6117 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -406,7 +406,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
default:
assert(0 && "Invalid transform size");
@@ -428,7 +428,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (p->eobs[block] > 0)
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
}
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
@@ -574,7 +574,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- xd->itxm_add(dqcoeff, dst, dst_stride, *eob);
+ x->itxm_add(dqcoeff, dst, dst_stride, *eob);
else
vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
}
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 645503103..c955d27c0 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -602,9 +602,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
// is set.
cpi->oxcf.worst_allowed_q = 0;
cpi->oxcf.best_allowed_q = 0;
- cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
+ cpi->mb.itxm_add = vp9_iwht4x4_add;
} else {
- cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
+ cpi->mb.itxm_add = vp9_idct4x4_add;
}
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
@@ -632,9 +632,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cpi->oxcf.maximum_buffer_size = 240000;
}
- // Convert target bandwidth from Kbit/s to Bit/s
- cpi->oxcf.target_bandwidth *= 1000;
-
cpi->oxcf.starting_buffer_level =
vp9_rescale(cpi->oxcf.starting_buffer_level,
cpi->oxcf.target_bandwidth, 1000);
@@ -2103,8 +2100,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_clear_system_state();
- vp9_zero(cpi->rd.tx_select_threshes);
-
#if CONFIG_VP9_POSTPROC
if (cpi->oxcf.noise_sensitivity > 0) {
int l = 0;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index fce5b28f3..dc3832b16 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -64,8 +64,7 @@
#define MIN_GF_INTERVAL 4
#endif
-
-// #define LONG_TERM_VBR_CORRECTION
+#define LONG_TERM_VBR_CORRECTION
static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
YV12_BUFFER_CONFIG temp = *a;
@@ -170,7 +169,6 @@ static void zero_stats(FIRSTPASS_STATS *section) {
section->intra_error = 0.0;
section->coded_error = 0.0;
section->sr_coded_error = 0.0;
- section->ssim_weighted_pred_err = 0.0;
section->pcnt_inter = 0.0;
section->pcnt_motion = 0.0;
section->pcnt_second_ref = 0.0;
@@ -195,7 +193,6 @@ static void accumulate_stats(FIRSTPASS_STATS *section,
section->intra_error += frame->intra_error;
section->coded_error += frame->coded_error;
section->sr_coded_error += frame->sr_coded_error;
- section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err;
section->pcnt_inter += frame->pcnt_inter;
section->pcnt_motion += frame->pcnt_motion;
section->pcnt_second_ref += frame->pcnt_second_ref;
@@ -218,7 +215,6 @@ static void subtract_stats(FIRSTPASS_STATS *section,
section->intra_error -= frame->intra_error;
section->coded_error -= frame->coded_error;
section->sr_coded_error -= frame->sr_coded_error;
- section->ssim_weighted_pred_err -= frame->ssim_weighted_pred_err;
section->pcnt_inter -= frame->pcnt_inter;
section->pcnt_motion -= frame->pcnt_motion;
section->pcnt_second_ref -= frame->pcnt_second_ref;
@@ -242,7 +238,6 @@ static void avg_stats(FIRSTPASS_STATS *section) {
section->intra_error /= section->count;
section->coded_error /= section->count;
section->sr_coded_error /= section->count;
- section->ssim_weighted_pred_err /= section->count;
section->pcnt_inter /= section->count;
section->pcnt_second_ref /= section->count;
section->pcnt_neutral /= section->count;
@@ -259,86 +254,18 @@ static void avg_stats(FIRSTPASS_STATS *section) {
// Calculate a modified Error used in distributing bits between easier and
// harder frames.
-static double calculate_modified_err(const VP9_COMP *cpi,
+static double calculate_modified_err(const TWO_PASS *twopass,
+ const VP9EncoderConfig *oxcf,
const FIRSTPASS_STATS *this_frame) {
- const TWO_PASS *twopass = &cpi->twopass;
- const SVC *const svc = &cpi->svc;
- const FIRSTPASS_STATS *stats;
- double av_err;
- double modified_error;
-
- if (svc->number_spatial_layers > 1 &&
- svc->number_temporal_layers == 1) {
- twopass = &svc->layer_context[svc->spatial_layer_id].twopass;
- }
-
- stats = &twopass->total_stats;
- av_err = stats->ssim_weighted_pred_err / stats->count;
- modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
- DOUBLE_DIVIDE_CHECK(av_err),
- cpi->oxcf.two_pass_vbrbias / 100.0);
-
+ const FIRSTPASS_STATS *const stats = &twopass->total_stats;
+ const double av_err = stats->coded_error / stats->count;
+ const double modified_error = av_err *
+ pow(this_frame->coded_error / DOUBLE_DIVIDE_CHECK(av_err),
+ oxcf->two_pass_vbrbias / 100.0);
return fclamp(modified_error,
twopass->modified_error_min, twopass->modified_error_max);
}
-static const double weight_table[256] = {
- 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
- 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
- 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
- 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
- 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.031250, 0.062500,
- 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, 0.250000, 0.281250,
- 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, 0.500000,
- 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750,
- 0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500,
- 0.968750, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
- 1.000000, 1.000000, 1.000000, 1.000000
-};
-
-static double simple_weight(const YV12_BUFFER_CONFIG *buf) {
- int i, j;
- double sum = 0.0;
- const int w = buf->y_crop_width;
- const int h = buf->y_crop_height;
- const uint8_t *row = buf->y_buffer;
-
- for (i = 0; i < h; ++i) {
- const uint8_t *pixel = row;
- for (j = 0; j < w; ++j)
- sum += weight_table[*pixel++];
- row += buf->y_stride;
- }
-
- return MAX(0.1, sum / (w * h));
-}
-
// This function returns the maximum target rate per frame.
static int frame_max_bits(const RATE_CONTROL *rc,
const VP9EncoderConfig *oxcf) {
@@ -533,6 +460,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
TWO_PASS *twopass = &cpi->twopass;
const MV zero_mv = {0, 0};
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+ FIRSTPASS_STATS fps;
vp9_clear_system_state();
@@ -830,14 +758,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp9_clear_system_state();
{
- FIRSTPASS_STATS fps;
-
fps.frame = cm->current_video_frame;
fps.spatial_layer_id = cpi->svc.spatial_layer_id;
fps.intra_error = (double)(intra_error >> 8);
fps.coded_error = (double)(coded_error >> 8);
fps.sr_coded_error = (double)(sr_coded_error >> 8);
- fps.ssim_weighted_pred_err = fps.coded_error * simple_weight(cpi->Source);
fps.count = 1.0;
fps.pcnt_inter = (double)intercount / cm->MBs;
fps.pcnt_second_ref = (double)second_ref_count / cm->MBs;
@@ -871,8 +796,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start);
// Don't want to do output stats with a stack variable!
- twopass->this_frame_stats = fps;
- output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
+ output_stats(&fps, cpi->output_pkt_list);
accumulate_stats(&twopass->total_stats, &fps);
}
@@ -880,9 +804,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
// the prediction is good enough... but also don't allow it to lag too far.
if ((twopass->sr_update_lag > 3) ||
((cm->current_video_frame > 0) &&
- (twopass->this_frame_stats.pcnt_inter > 0.20) &&
- ((twopass->this_frame_stats.intra_error /
- DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
+ (fps.pcnt_inter > 0.20) &&
+ ((fps.intra_error /
+ DOUBLE_DIVIDE_CHECK(fps.coded_error)) > 2.0))) {
if (gld_yv12 != NULL) {
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
}
@@ -1043,25 +967,19 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
// Scan the first pass file and calculate a modified total error based upon
// the bias/power function used to allocate bits.
{
- const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
- FIRSTPASS_STATS this_frame;
- const double av_error = stats->ssim_weighted_pred_err /
- DOUBLE_DIVIDE_CHECK(stats->count);
-
-
- twopass->modified_error_total = 0.0;
- twopass->modified_error_min =
- (av_error * oxcf->two_pass_vbrmin_section) / 100;
- twopass->modified_error_max =
- (av_error * oxcf->two_pass_vbrmax_section) / 100;
-
- while (input_stats(twopass, &this_frame) != EOF) {
- twopass->modified_error_total +=
- calculate_modified_err(cpi, &this_frame);
+ const double avg_error = stats->coded_error /
+ DOUBLE_DIVIDE_CHECK(stats->count);
+ const FIRSTPASS_STATS *s = twopass->stats_in;
+ double modified_error_total = 0.0;
+ twopass->modified_error_min = (avg_error *
+ oxcf->two_pass_vbrmin_section) / 100;
+ twopass->modified_error_max = (avg_error *
+ oxcf->two_pass_vbrmax_section) / 100;
+ while (s < twopass->stats_in_end) {
+ modified_error_total += calculate_modified_err(twopass, oxcf, s);
+ ++s;
}
- twopass->modified_error_left = twopass->modified_error_total;
-
- reset_fpf_position(twopass, start_pos);
+ twopass->modified_error_left = modified_error_total;
}
// Reset the vbr bits off target counter
@@ -1182,16 +1100,17 @@ static void accumulate_frame_motion_stats(
}
// Calculate a baseline boost number for the current frame.
-static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame,
+static double calc_frame_boost(const TWO_PASS *twopass,
+ const FIRSTPASS_STATS *this_frame,
double this_frame_mv_in_out) {
double frame_boost;
// Underlying boost factor is based on inter intra error ratio.
- if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
+ if (this_frame->intra_error > twopass->gf_intra_err_min)
frame_boost = (IIFACTOR * this_frame->intra_error /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
else
- frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
+ frame_boost = (IIFACTOR * twopass->gf_intra_err_min /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
// Increase boost for frames where new data coming into frame (e.g. zoom out).
@@ -1244,8 +1163,8 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
? MIN_DECAY_FACTOR : decay_accumulator;
}
- boost_score += (decay_accumulator *
- calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));
+ boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+ this_frame_mv_in_out);
}
*f_boost = (int)boost_score;
@@ -1281,8 +1200,8 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
? MIN_DECAY_FACTOR : decay_accumulator;
}
- boost_score += (decay_accumulator *
- calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));
+ boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+ this_frame_mv_in_out);
}
*b_boost = (int)boost_score;
@@ -1500,6 +1419,66 @@ static int calculate_boost_bits(int frame_count,
return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
}
+static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
+ double group_error, int gf_arf_bits) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ TWO_PASS *twopass = &cpi->twopass;
+ FIRSTPASS_STATS frame_stats;
+ int i;
+ int group_frame_index = 1;
+ int target_frame_size;
+ int key_frame;
+ const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
+ int64_t total_group_bits = gf_group_bits;
+ double modified_err = 0.0;
+ double err_fraction;
+
+ key_frame = cpi->common.frame_type == KEY_FRAME ||
+ vp9_is_upper_layer_key_frame(cpi);
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ // NOTE: We dont bother to check for the special case of ARF overlay
+ // frames here, as there is clamping code for this in the function
+ // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
+ // encodes.
+ if (!key_frame) {
+ twopass->gf_group_bit_allocation[0] = gf_arf_bits;
+
+ // Step over the golden frame / overlay frame
+ if (EOF == input_stats(twopass, &frame_stats))
+ return;
+ }
+
+ // Store the bits to spend on the ARF if there is one.
+ if (rc->source_alt_ref_pending) {
+ twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits;
+ }
+
+ // Deduct the boost bits for arf or gf if it is not a key frame.
+ if (rc->source_alt_ref_pending || !key_frame)
+ total_group_bits -= gf_arf_bits;
+
+ // Allocate bits to the other frames in the group.
+ for (i = 0; i < rc->baseline_gf_interval - 1; ++i) {
+ if (EOF == input_stats(twopass, &frame_stats))
+ break;
+
+ modified_err = calculate_modified_err(twopass, oxcf, &frame_stats);
+
+ if (group_error > 0)
+ err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
+ else
+ err_fraction = 0.0;
+
+ target_frame_size = (int)((double)total_group_bits * err_fraction);
+ target_frame_size = clamp(target_frame_size, 0,
+ MIN(max_bits, (int)total_group_bits));
+
+ twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size;
+ }
+}
// Analyse and define a gf/arf group.
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
@@ -1509,6 +1488,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
FIRSTPASS_STATS next_frame;
const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
int i;
+
double boost_score = 0.0;
double old_boost_score = 0.0;
double gf_group_err = 0.0;
@@ -1532,14 +1512,24 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int b_boost = 0;
int flash_detected;
int active_max_gf_interval;
+ int64_t gf_group_bits;
+ double gf_group_error_left;
+ int gf_arf_bits;
+
+ // Reset the GF group data structures unless this is a key
+ // frame in which case it will already have been done.
+ if (cpi->common.frame_type != KEY_FRAME) {
+ twopass->gf_group_index = 0;
+ vp9_zero(twopass->gf_group_bit_allocation);
+ }
vp9_clear_system_state();
vp9_zero(next_frame);
- twopass->gf_group_bits = 0;
+ gf_group_bits = 0;
// Load stats for the current frame.
- mod_frame_err = calculate_modified_err(cpi, this_frame);
+ mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
// Note the error of the frame at the start of the group. This will be
// the GF frame error if we code a normal gf.
@@ -1571,7 +1561,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
++i;
// Accumulate error score of frames in this gf group.
- mod_frame_err = calculate_modified_err(cpi, this_frame);
+ mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
gf_group_err += mod_frame_err;
if (EOF == input_stats(twopass, &next_frame))
@@ -1610,12 +1600,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Calculate a boost number for this frame.
- boost_score += (decay_accumulator *
- calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out));
+ boost_score += decay_accumulator * calc_frame_boost(twopass, &next_frame,
+ this_frame_mv_in_out);
// Break out conditions.
if (
- // Break at cpi->max_gf_interval unless almost totally static.
+ // Break at active_max_gf_interval unless almost totally static.
(i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) ||
(
// Don't break out with a very short interval.
@@ -1646,7 +1636,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
break;
if (i < rc->frames_to_key) {
- mod_frame_err = calculate_modified_err(cpi, this_frame);
+ mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
gf_group_err += mod_frame_err;
}
}
@@ -1742,7 +1732,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
reset_fpf_position(twopass, start_pos);
// Calculate the bits to be allocated to the gf/arf group as a whole
- twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
+ gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
// Calculate the extra bits to be used for boosted frame(s)
{
@@ -1753,19 +1743,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200);
// Calculate the extra bits to be used for boosted frame(s)
- twopass->gf_bits = calculate_boost_bits(rc->baseline_gf_interval,
- boost, twopass->gf_group_bits);
-
-
- // For key frames the frame target rate is set already.
- // NOTE: We dont bother to check for the special case of ARF overlay
- // frames here, as there is clamping code for this in the function
- // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
- // encodes.
- if (cpi->common.frame_type != KEY_FRAME &&
- !vp9_is_upper_layer_key_frame(cpi)) {
- vp9_rc_set_frame_target(cpi, twopass->gf_bits);
- }
+ gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
+ boost, gf_group_bits);
}
// Adjust KF group bits and error remaining.
@@ -1778,14 +1757,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// For normal GFs remove the score for the GF itself unless this is
// also a key frame in which case it has already been accounted for.
if (rc->source_alt_ref_pending) {
- twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err);
+ gf_group_error_left = gf_group_err - mod_frame_err;
} else if (cpi->common.frame_type != KEY_FRAME) {
- twopass->gf_group_error_left = (int64_t)(gf_group_err
- - gf_first_frame_err);
+ gf_group_error_left = gf_group_err - gf_first_frame_err;
} else {
- twopass->gf_group_error_left = (int64_t)gf_group_err;
+ gf_group_error_left = gf_group_err;
}
+ // Allocate bits to each of the frames in the GF group.
+ allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
+
+ // Reset the file position.
+ reset_fpf_position(twopass, start_pos);
+
// Calculate a section intra ratio used in setting max loop filter.
if (cpi->common.frame_type != KEY_FRAME) {
twopass->section_intra_rating =
@@ -1794,37 +1778,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
}
-// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
-static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- TWO_PASS *twopass = &cpi->twopass;
- // For a single frame.
- const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
- // Calculate modified prediction error used in bit allocation.
- const double modified_err = calculate_modified_err(cpi, this_frame);
- int target_frame_size;
- double err_fraction;
-
- if (twopass->gf_group_error_left > 0)
- // What portion of the remaining GF group error is used by this frame.
- err_fraction = modified_err / twopass->gf_group_error_left;
- else
- err_fraction = 0.0;
-
- // How many of those bits available for allocation should we give it?
- target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction);
-
- // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at
- // the top end.
- target_frame_size = clamp(target_frame_size, 0,
- MIN(max_bits, (int)twopass->gf_group_bits));
-
- // Adjust error and bits remaining.
- twopass->gf_group_error_left -= (int64_t)modified_err;
-
- // Per frame bit target for this frame.
- vp9_rc_set_frame_target(cpi, target_frame_size);
-}
-
static int test_candidate_kf(TWO_PASS *twopass,
const FIRSTPASS_STATS *last_frame,
const FIRSTPASS_STATS *this_frame,
@@ -1906,10 +1859,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int i, j;
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const FIRSTPASS_STATS first_frame = *this_frame;
const FIRSTPASS_STATS *const start_position = twopass->stats_in;
FIRSTPASS_STATS next_frame;
FIRSTPASS_STATS last_frame;
+ int kf_bits = 0;
double decay_accumulator = 1.0;
double zero_motion_accumulator = 1.0;
double boost_score = 0.0;
@@ -1921,6 +1876,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
cpi->common.frame_type = KEY_FRAME;
+ // Reset the GF group data structures.
+ twopass->gf_group_index = 0;
+ vp9_zero(twopass->gf_group_bit_allocation);
+
// Is this a forced key frame by interval.
rc->this_key_frame_forced = rc->next_key_frame_forced;
@@ -1935,14 +1894,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_group_bits = 0; // Total bits available to kf group
twopass->kf_group_error_left = 0; // Group modified error score.
- kf_mod_err = calculate_modified_err(cpi, this_frame);
+ kf_mod_err = calculate_modified_err(twopass, oxcf, this_frame);
// Find the next keyframe.
i = 0;
while (twopass->stats_in < twopass->stats_in_end &&
rc->frames_to_key < cpi->oxcf.key_freq) {
// Accumulate kf group error.
- kf_group_err += calculate_modified_err(cpi, this_frame);
+ kf_group_err += calculate_modified_err(twopass, oxcf, this_frame);
// Load the next frame's stats.
last_frame = *this_frame;
@@ -2004,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Rescan to get the correct error data for the forced kf group.
for (i = 0; i < rc->frames_to_key; ++i) {
- kf_group_err += calculate_modified_err(cpi, &tmp_frame);
+ kf_group_err += calculate_modified_err(twopass, oxcf, &tmp_frame);
input_stats(twopass, &tmp_frame);
}
rc->next_key_frame_forced = 1;
@@ -2018,7 +1977,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Special case for the last key frame of the file.
if (twopass->stats_in >= twopass->stats_in_end) {
// Accumulate kf group error.
- kf_group_err += calculate_modified_err(cpi, this_frame);
+ kf_group_err += calculate_modified_err(twopass, oxcf, this_frame);
}
// Calculate the number of bits that should be assigned to the kf group.
@@ -2086,6 +2045,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
}
+ reset_fpf_position(twopass, start_position);
+
// Store the zero motion percentage
twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
@@ -2102,13 +2063,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (rc->kf_boost < MIN_KF_BOOST)
rc->kf_boost = MIN_KF_BOOST;
- twopass->kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
- rc->kf_boost, twopass->kf_group_bits);
+ kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
+ rc->kf_boost, twopass->kf_group_bits);
- twopass->kf_group_bits -= twopass->kf_bits;
+ twopass->kf_group_bits -= kf_bits;
- // Per frame bit target for this frame.
- vp9_rc_set_frame_target(cpi, twopass->kf_bits);
+ // Save the bits to spend on the key frame.
+ twopass->gf_group_bit_allocation[0] = kf_bits;
// Note the total error score of the kf group minus the key frame itself.
twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -2144,7 +2105,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
- int target;
+ int target_rate;
LAYER_CONTEXT *lc = NULL;
const int is_spatial_svc = (cpi->use_svc &&
cpi->svc.number_temporal_layers == 1);
@@ -2160,16 +2121,23 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
if (!twopass->stats_in)
return;
+ // Increment the gf group index.
+ ++twopass->gf_group_index;
+
+ // If this is an arf frame then we dont want to read the stats file or
+ // advance the input pointer as we already have what we need.
if (cpi->refresh_alt_ref_frame) {
- int modified_target = twopass->gf_bits;
- rc->base_frame_target = twopass->gf_bits;
- cm->frame_type = INTER_FRAME;
+ int target_rate;
+ target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
+ target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
+ rc->base_frame_target = target_rate;
#ifdef LONG_TERM_VBR_CORRECTION
// Correction to rate target based on prior over or under shoot.
if (cpi->oxcf.rc_mode == RC_MODE_VBR)
- vbr_rate_correction(&modified_target, rc->vbr_bits_off_target);
+ vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
#endif
- vp9_rc_set_frame_target(cpi, modified_target);
+ vp9_rc_set_frame_target(cpi, target_rate);
+ cm->frame_type = INTER_FRAME;
return;
}
@@ -2197,11 +2165,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
if (EOF == input_stats(twopass, &this_frame))
return;
+ // Local copy of the current frame's first pass stats.
+ this_frame_copy = this_frame;
+
// Keyframe and section processing.
if (rc->frames_to_key == 0 ||
(cpi->frame_flags & FRAMEFLAGS_KEY)) {
// Define next KF group and assign bits to it.
- this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
} else {
cm->frame_type = INTER_FRAME;
@@ -2220,11 +2190,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
}
}
- // Is this frame a GF / ARF? (Note: a key frame is always also a GF).
+ // Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
- // Define next gf group and assign bits to it.
- this_frame_copy = this_frame;
-
#if CONFIG_MULTIPLE_ARF
if (cpi->multi_arf_enabled) {
define_fixed_arf_period(cpi);
@@ -2247,11 +2214,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
cpi->refresh_golden_frame = 1;
- } else {
- // Otherwise this is an ordinary frame.
- // Assign bits from those allocated to the GF group.
- this_frame_copy = this_frame;
- assign_std_frame_bits(cpi, &this_frame_copy);
}
{
@@ -2262,18 +2224,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
}
}
+ target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
if (cpi->common.frame_type == KEY_FRAME)
- target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target);
+ target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
else
- target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target);
+ target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
- rc->base_frame_target = target;
+ rc->base_frame_target = target_rate;
#ifdef LONG_TERM_VBR_CORRECTION
// Correction to rate target based on prior over or under shoot.
if (cpi->oxcf.rc_mode == RC_MODE_VBR)
- vbr_rate_correction(&target, rc->vbr_bits_off_target);
+ vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
#endif
- vp9_rc_set_frame_target(cpi, target);
+ vp9_rc_set_frame_target(cpi, target_rate);
// Update the total stats remaining structure.
subtract_stats(&twopass->total_left_stats, &this_frame);
@@ -2322,8 +2285,6 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
} else {
#endif
twopass->kf_group_bits -= bits_used;
- twopass->gf_group_bits -= bits_used;
- twopass->gf_group_bits = MAX(twopass->gf_group_bits, 0);
}
twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0);
}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index d84793e28..c89cfaf8d 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -11,6 +11,8 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
+#include "vp9/encoder/vp9_lookahead.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -42,7 +44,6 @@ typedef struct {
unsigned int section_intra_rating;
unsigned int next_iiratio;
FIRSTPASS_STATS total_stats;
- FIRSTPASS_STATS this_frame_stats;
const FIRSTPASS_STATS *stats_in;
const FIRSTPASS_STATS *stats_in_start;
const FIRSTPASS_STATS *stats_in_end;
@@ -51,31 +52,24 @@ typedef struct {
int64_t bits_left;
double modified_error_min;
double modified_error_max;
- double modified_error_total;
double modified_error_left;
double kf_intra_err_min;
double gf_intra_err_min;
- int kf_bits;
- // Remaining error from uncoded frames in a gf group. Two pass use only
- int64_t gf_group_error_left;
// Projected total bits available for a key frame group of frames
int64_t kf_group_bits;
// Error score of frames still to be coded in kf group
int64_t kf_group_error_left;
-
- // Projected Bits available for a group of frames including 1 GF or ARF
- int64_t gf_group_bits;
- // Bits for the golden frame or ARF - 2 pass only
- int gf_bits;
-
int sr_update_lag;
int kf_zeromotion_pct;
int gf_zeromotion_pct;
int active_worst_quality;
+
+ int gf_group_index;
+ int gf_group_bit_allocation[MAX_LAG_BUFFERS * 2];
} TWO_PASS;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 041e583fd..842bc5b9d 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -72,8 +72,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
x->mv_row_max = tmp_row_max;
return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride,
- INT_MAX);
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride);
}
static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
@@ -86,8 +85,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
// Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction
err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
- INT_MAX);
+ xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
dst_mv->as_int = 0;
// Test last reference frame using the previous best mv as the
@@ -123,8 +121,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) {
// Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction
err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
- INT_MAX);
+ xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
dst_mv->as_int = 0;
@@ -147,7 +144,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
0, 0, 0);
err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err);
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride);
// find best
if (err < best_err) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 4f7d6f17c..dbd19a2d6 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -524,9 +524,8 @@ static int vp9_pattern_search(const MACROBLOCK *x,
// Work out the start point for the search
bestsad = vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride,
- 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv,
- sad_per_bit);
+ get_buf_from_mv(in_what, ref_mv), in_what->stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
// Search all possible scales upto the search param around the center point
// pick the scale of the point that is best as the starting scale of
@@ -542,7 +541,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
bc + candidates[t][i].col};
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
} else {
@@ -553,7 +552,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
continue;
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
}
@@ -585,7 +584,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
bc + candidates[s][i].col};
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
} else {
@@ -596,7 +595,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
continue;
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
}
@@ -623,7 +622,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
bc + candidates[s][next_chkpts_indices[i]].col};
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
} else {
@@ -634,7 +633,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
continue;
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
}
@@ -661,7 +660,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
bc + neighbors[i].col};
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
} else {
@@ -672,7 +671,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
continue;
thissad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
- in_what->stride, bestsad);
+ in_what->stride);
CHECK_BETTER
}
}
@@ -894,8 +893,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
*best_mv = *ref_mv;
*num00 = 11;
best_sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride,
- 0x7fffffff) +
+ get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
start_row = MAX(-range, x->mv_row_min - ref_mv->row);
start_col = MAX(-range, x->mv_col_min - ref_mv->col);
@@ -929,7 +927,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
for (i = 0; i < end_col - c; ++i) {
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+ get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
@@ -975,7 +973,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// Check the starting position
best_sad = fn_ptr->sdf(what->buf, what->stride,
- best_address, in_what->stride, 0x7fffffff) +
+ best_address, in_what->stride) +
mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 1;
@@ -986,8 +984,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
best_mv->col + ss[i].mv.col};
if (is_mv_in(x, &mv)) {
int sad = fn_ptr->sdf(what->buf, what->stride,
- best_address + ss[i].offset, in_what->stride,
- best_sad);
+ best_address + ss[i].offset, in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
@@ -1012,7 +1009,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
if (is_mv_in(x, &this_mv)) {
int sad = fn_ptr->sdf(what->buf, what->stride,
best_address + ss[best_site].offset,
- in_what->stride, best_sad);
+ in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
@@ -1077,7 +1074,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
best_address = in_what;
// Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 1;
@@ -1129,7 +1126,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
if (is_mv_in(x, &this_mv)) {
const uint8_t *const check_here = ss[i].offset + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
- in_what_stride, bestsad);
+ in_what_stride);
if (thissad < bestsad) {
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
@@ -1154,7 +1151,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
if (is_mv_in(x, &this_mv)) {
const uint8_t *const check_here = ss[best_site].offset + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
- in_what_stride, bestsad);
+ in_what_stride);
if (thissad < bestsad) {
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
mvjsadcost, mvsadcost, sad_per_bit);
@@ -1253,7 +1250,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
int best_sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
*best_mv = *ref_mv;
@@ -1261,7 +1258,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
for (c = col_min; c < col_max; ++c) {
const MV mv = {r, c};
const int sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
+ get_buf_from_mv(in_what, &mv), in_what->stride) +
mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
@@ -1286,7 +1283,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
*best_mv = *ref_mv;
@@ -1320,7 +1317,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
while (c < col_max) {
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
- check_here, in_what->stride, best_sad);
+ check_here, in_what->stride);
if (sad < best_sad) {
const MV mv = {r, c};
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
@@ -1351,7 +1348,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
*best_mv = *ref_mv;
@@ -1409,7 +1406,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
while (c < col_max) {
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
- check_here, in_what->stride, best_sad);
+ check_here, in_what->stride);
if (sad < best_sad) {
const MV mv = {r, c};
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
@@ -1438,7 +1435,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv),
- in_what->stride, 0x7fffffff) +
+ in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
@@ -1450,7 +1447,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
ref_mv->col + neighbors[j].col};
if (is_mv_in(x, &mv)) {
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+ get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
@@ -1483,7 +1480,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
- in_what->stride, 0x7fffffff) +
+ in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
@@ -1524,7 +1521,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
if (is_mv_in(x, &mv)) {
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv),
- in_what->stride, best_sad);
+ in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
@@ -1563,8 +1560,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride,
- second_pred, 0x7fffffff) +
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
@@ -1577,8 +1573,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
if (is_mv_in(x, &mv)) {
unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride,
- second_pred, best_sad);
+ get_buf_from_mv(in_what, &mv), in_what->stride, second_pred);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 437b68078..913b8ead4 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -280,13 +280,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate_mv = 0;
- if (cpi->sf.disable_inter_mode_mask[bsize] &
- (1 << INTER_OFFSET(this_mode)))
+ if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
- if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] *
- rd_thresh_freq_fact[this_mode] >> 5) ||
- rd_threshes[mode_idx[this_mode]] == INT_MAX)
+ if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]],
+ rd_thresh_freq_fact[this_mode]))
continue;
if (this_mode == NEWMV) {
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index a04622c8c..b58eac981 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -200,9 +200,8 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
oxcf->best_allowed_q) / 2;
}
- rc->last_q[0] = oxcf->best_allowed_q;
- rc->last_q[1] = oxcf->best_allowed_q;
- rc->last_q[2] = oxcf->best_allowed_q;
+ rc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
+ rc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
rc->buffer_level = oxcf->starting_buffer_level;
rc->bits_off_target = oxcf->starting_buffer_level;
@@ -213,6 +212,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->long_rolling_actual_bits = rc->avg_frame_bandwidth;
rc->total_actual_bits = 0;
+ rc->total_target_bits = 0;
rc->total_target_vs_actual = 0;
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
@@ -607,13 +607,27 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
return q;
}
+static int get_active_cq_level(const RATE_CONTROL *rc,
+ const VP9EncoderConfig *const oxcf) {
+ static const double cq_adjust_threshold = 0.5;
+ int active_cq_level = oxcf->cq_level;
+ if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY &&
+ rc->total_target_bits > 0) {
+ const double x = (double)rc->total_actual_bits / rc->total_target_bits;
+ if (x < cq_adjust_threshold) {
+ active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold);
+ }
+ }
+ return active_cq_level;
+}
+
static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
int *bottom_index,
int *top_index) {
const VP9_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const int cq_level = oxcf->cq_level;
+ const int cq_level = get_active_cq_level(rc, oxcf);
int active_best_quality;
int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
int q;
@@ -791,7 +805,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
const VP9_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const int cq_level = oxcf->cq_level;
+ const int cq_level = get_active_cq_level(rc, oxcf);
int active_best_quality;
int active_worst_quality = cpi->twopass.active_worst_quality;
int q;
@@ -1075,7 +1089,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
!(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) {
- rc->last_q[2] = qindex;
rc->avg_frame_qindex[2] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2);
} else {
@@ -1375,6 +1388,24 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
return target_index - qindex;
}
+void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf,
+ RATE_CONTROL *const rc) {
+ // Set Maximum gf/arf interval
+ rc->max_gf_interval = 16;
+
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = oxcf->key_freq >> 1;
+
+ // Special conditions when alt ref frame enabled
+ if (oxcf->play_alternate && oxcf->lag_in_frames) {
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
+
+ if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+ rc->max_gf_interval = rc->static_scene_max_gf_interval;
+}
+
void vp9_rc_update_framerate(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1399,21 +1430,5 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
vbr_max_bits);
- // Set Maximum gf/arf interval
- rc->max_gf_interval = 16;
-
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
-
- // Special conditions when alt ref frame enabled in lagged compress mode
- if (oxcf->play_alternate && oxcf->lag_in_frames) {
- if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
- rc->max_gf_interval = oxcf->lag_in_frames - 1;
-
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
-
- if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
- rc->max_gf_interval = rc->static_scene_max_gf_interval;
+ vp9_rc_set_gf_max_interval(oxcf, rc);
}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index b1cc67609..d6a0151b6 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -30,7 +30,7 @@ typedef struct {
int this_frame_target; // Actual frame target after rc adjustment.
int projected_frame_size;
int sb64_target_rate;
- int last_q[3]; // Separate values for Intra/Inter/ARF-GF
+ int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
int last_boosted_qindex; // Last boosted GF/KF/ARF q
int gfu_boost;
@@ -178,6 +178,9 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
void vp9_rc_update_framerate(struct VP9_COMP *cpi);
+void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf,
+ RATE_CONTROL *const rc);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 601e64d39..f68aa2738 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -9,7 +9,6 @@
*/
#include <assert.h>
-#include <limits.h>
#include <math.h>
#include <stdio.h>
@@ -22,7 +21,6 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
-#include "vp9/common/vp9_pragmas.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_reconinter.h"
@@ -1677,9 +1675,9 @@ static INLINE int mv_has_subpel(const MV *mv) {
static int check_best_zero_mv(
const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
- int disable_inter_mode_mask, int this_mode,
+ int inter_mode_mask, int this_mode,
const MV_REFERENCE_FRAME ref_frames[2]) {
- if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+ if ((inter_mode_mask & (1 << ZEROMV)) &&
(this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
(ref_frames[1] == NONE ||
@@ -1745,7 +1743,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
const int has_second_rf = has_second_ref(mbmi);
- const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
+ const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
vp9_zero(*bsi);
@@ -1794,11 +1792,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
mode_idx = INTER_OFFSET(this_mode);
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
- if (disable_inter_mode_mask & (1 << mode_idx))
+ if (!(inter_mode_mask & (1 << this_mode)))
continue;
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
- disable_inter_mode_mask,
+ inter_mode_mask,
this_mode, mbmi->ref_frame))
continue;
@@ -2131,8 +2129,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
// Find sad for current vector.
this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
- ref_y_ptr, ref_y_stride,
- 0x7fffffff);
+ ref_y_ptr, ref_y_stride);
// Note if it is the best so far.
if (this_sad < best_sad) {
@@ -2210,8 +2207,6 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm,
static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index,
- int_mv *ref_mv,
- int_mv *second_ref_mv,
int64_t comp_pred_diff[REFERENCE_MODES],
const int64_t tx_size_diff[TX_MODES],
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
@@ -2991,11 +2986,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
ctx->mic = *xd->mi[0];
}
-static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
- int thresh_fact) {
- return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
-}
-
// Updating rd_thresh_freq_fact[] here means that the different
// partition/block sizes are handled independently based on the best
// choice for the current partition. It may well be better to keep a scaled
@@ -3073,7 +3063,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
const int intra_y_mode_mask =
cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
- int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
+ int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
vp9_zero(best_mbmode);
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -3139,6 +3129,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
const int inter_non_zero_mode_mask = 0x1F7F7;
mode_skip_mask |= inter_non_zero_mode_mask;
+ mode_skip_mask &= ~(1 << THR_ZEROMV);
+ inter_mode_mask = (1 << ZEROMV);
}
// Disable this drop out case if the ref frame
@@ -3172,7 +3164,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
if (bsize > cpi->sf.max_intra_bsize) {
- mode_skip_mask |= 0xFF30808;
+ const int all_intra_modes = (1 << THR_DC) | (1 << THR_TM) |
+ (1 << THR_H_PRED) | (1 << THR_V_PRED) | (1 << THR_D135_PRED) |
+ (1 << THR_D207_PRED) | (1 << THR_D153_PRED) | (1 << THR_D63_PRED) |
+ (1 << THR_D117_PRED) | (1 << THR_D45_PRED);
+ mode_skip_mask |= all_intra_modes;
}
if (!x->in_active_map) {
@@ -3186,7 +3182,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mode_index = THR_ZEROMV;
mode_skip_mask = ~(1 << mode_index);
mode_skip_start = MAX_MODES;
- disable_inter_mode_mask = 0;
+ inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
+ (1 << NEWMV);
}
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
@@ -3228,13 +3225,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Test best rd so far against threshold for trying this mode.
if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
- rd_thresh_freq_fact[mode_index]))
+ rd_thresh_freq_fact[mode_index]))
continue;
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame[0];
- if (ref_frame != INTRA_FRAME &&
- disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
+ if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode)))
continue;
second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
@@ -3283,7 +3279,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
- disable_inter_mode_mask, this_mode, ref_frames))
+ inter_mode_mask, this_mode, ref_frames))
continue;
}
}
@@ -3621,9 +3617,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_mode_index,
- &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
- &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
- mbmi->ref_frame[1]][0],
best_pred_diff, best_tx_diff, best_filter_diff);
return best_rd;
@@ -3672,7 +3665,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
- int ref_frame_mask = 0;
int mode_skip_mask = 0;
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -3707,17 +3699,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- for (ref_frame = LAST_FRAME;
- ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
- int i;
- for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
- if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
- ref_frame_mask |= (1 << ref_frame);
- break;
- }
- }
- }
-
for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
@@ -3812,11 +3793,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
(int)ref_frame) {
continue;
- // If the segment skip feature is enabled....
- // then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
- ref_frame != INTRA_FRAME) {
- continue;
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
@@ -4041,15 +4017,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
}
if (!disable_skip) {
- // Test for the condition where skip block will be activated
- // because there are no non zero coefficients and make any
- // necessary adjustment for rate. Ignore if skip is coded at
- // segment level as the cost wont have been added in.
- // Is Mb level skip allowed (i.e. not coded at segment level).
- const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
- SEG_LVL_SKIP);
+ // Skip is never coded at the segment level for sub8x8 blocks and instead
+ // always coded in the bitstream at the mode info level.
- if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
+ if (ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
// Add in the cost of the no skip flag.
@@ -4064,7 +4035,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
rate_uv = 0;
this_skip2 = 1;
}
- } else if (mb_skip_allowed) {
+ } else {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
}
@@ -4254,9 +4225,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_ref_index,
- &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
- &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
- mbmi->ref_frame[1]][0],
best_pred_diff, best_tx_diff, best_filter_diff);
return best_rd;
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index b6b51e553..5ea09a8a7 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -11,6 +11,8 @@
#ifndef VP9_ENCODER_VP9_RDOPT_H_
#define VP9_ENCODER_VP9_RDOPT_H_
+#include <limits.h>
+
#include "vp9/encoder/vp9_encoder.h"
#ifdef __cplusplus
@@ -87,6 +89,11 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi);
+static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
+ int thresh_fact) {
+ return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
+}
+
static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full,
int step_param, int error_per_bit,
diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c
index 892e90551..d06263676 100644
--- a/vp9/encoder/vp9_sad.c
+++ b/vp9/encoder/vp9_sad.c
@@ -35,14 +35,12 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride,
#define sadMxN(m, n) \
unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- unsigned int max_sad) { \
+ const uint8_t *ref, int ref_stride) { \
return sad(src, src_stride, ref, ref_stride, m, n); \
} \
unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, \
- unsigned int max_sad) { \
+ const uint8_t *second_pred) { \
uint8_t comp_pred[m * n]; \
vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
return sad(src, src_stride, comp_pred, m, m, n); \
@@ -54,8 +52,7 @@ void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
unsigned int *sads) { \
int i; \
for (i = 0; i < k; ++i) \
- sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride, \
- 0x7fffffff); \
+ sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \
}
#define sadMxNx4D(m, n) \
@@ -64,8 +61,7 @@ void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
unsigned int *sads) { \
int i; \
for (i = 0; i < 4; ++i) \
- sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride, \
- 0x7fffffff); \
+ sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \
}
// 64x64
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 7b2d1e2f0..15b986197 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -14,20 +14,23 @@
#include "vp9/encoder/vp9_speed_features.h"
enum {
- ALL_INTRA_MODES = (1 << DC_PRED) |
+ INTRA_ALL = (1 << DC_PRED) |
(1 << V_PRED) | (1 << H_PRED) |
(1 << D45_PRED) | (1 << D135_PRED) |
(1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) |
(1 << TM_PRED),
-
- INTRA_DC_ONLY = (1 << DC_PRED),
-
- INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED),
-
+ INTRA_DC = (1 << DC_PRED),
+ INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
+ INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) |
+ (1 << H_PRED)
+};
- INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)
+enum {
+ INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
+ INTER_NEAREST = (1 << NEARESTMV),
+ INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV)
};
enum {
@@ -80,12 +83,16 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
}
if (speed >= 2) {
- if (MIN(cm->width, cm->height) >= 720)
+ if (MIN(cm->width, cm->height) >= 720) {
+ sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+ sf->last_partitioning_redo_frequency = 3;
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
- else
+ } else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-
+ sf->last_partitioning_redo_frequency = 2;
+ sf->lf_motion_threshold = NO_MOITION_THRESHOLD;
+ }
sf->adaptive_pred_interp_filter = 2;
sf->reference_masking = 1;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
@@ -97,7 +104,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1;
- sf->last_partitioning_redo_frequency = 3;
}
if (speed >= 3) {
@@ -108,6 +114,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
else
sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
+ sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+ sf->last_partitioning_redo_frequency = 3;
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
sf->adaptive_rd_thresh = 3;
sf->mode_skip_start = 6;
@@ -135,20 +143,23 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
for (i = 0; i < TX_SIZES; ++i) {
- sf->intra_y_mode_mask[i] = INTRA_DC_ONLY;
- sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+ sf->intra_y_mode_mask[i] = INTRA_DC;
+ sf->intra_uv_mode_mask[i] = INTRA_DC;
}
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
}
}
-static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
+static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
int speed) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int frames_since_key =
+ cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->use_fast_coef_costing = 1;
- if (speed == 1) {
+ if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
@@ -171,13 +182,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
}
if (speed >= 2) {
- sf->use_square_partition_only = !frame_is_intra_only(cm);
- sf->less_rectangular_check = 1;
- sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
- : USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
- sf->disable_split_mask = cm->show_frame ?
- DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
+ : DISABLE_ALL_INTER_SPLIT;
else
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
@@ -185,27 +192,18 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
- sf->use_rd_breakout = 1;
- sf->adaptive_motion_search = 1;
sf->adaptive_pred_interp_filter = 2;
- sf->auto_mv_step_size = 1;
sf->reference_masking = 1;
-
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+ sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
-
- sf->adaptive_rd_thresh = 2;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 3) {
@@ -233,19 +231,19 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
sf->adjust_partitioning_from_last_frame =
cm->last_frame_type != cm->frame_type || (0 ==
- (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
+ (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
sf->subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+ sf->intra_uv_mode_mask[i] = INTRA_DC;
}
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->frame_parameter_update = 0;
sf->search_method = FAST_HEX;
- sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
- sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
- sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
+ sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
sf->max_intra_bsize = BLOCK_32X32;
sf->allow_skip_recode = 1;
}
@@ -254,9 +252,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
sf->max_partition_size = BLOCK_32X32;
sf->min_partition_size = BLOCK_8X8;
sf->partition_check =
- (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1);
+ (frames_since_key % sf->last_partitioning_redo_frequency == 1);
sf->force_frame_boost = cm->frame_type == KEY_FRAME ||
- (cm->current_video_frame %
+ (frames_since_key %
(sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
@@ -276,7 +274,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
if (speed >= 7) {
int i;
for (i = 0; i < BLOCK_SIZES; ++i)
- sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV));
+ sf->inter_mode_mask[i] = INTER_NEAREST;
}
}
@@ -321,8 +319,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->disable_split_var_thresh = 0;
sf->disable_filter_search_var_thresh = 0;
for (i = 0; i < TX_SIZES; i++) {
- sf->intra_y_mode_mask[i] = ALL_INTRA_MODES;
- sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES;
+ sf->intra_y_mode_mask[i] = INTRA_ALL;
+ sf->intra_uv_mode_mask[i] = INTRA_ALL;
}
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
@@ -334,7 +332,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
- sf->disable_inter_mode_mask[i] = 0;
+ sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
@@ -356,7 +354,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
break;
case REALTIME:
- set_rt_speed_feature(cm, sf, oxcf->speed);
+ set_rt_speed_feature(cpi, sf, oxcf->speed);
break;
}
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index d8c1a8be2..3e7cd27d8 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -44,6 +44,11 @@ typedef enum {
} SUBPEL_SEARCH_METHODS;
typedef enum {
+ NO_MOITION_THRESHOLD = 0,
+ LOW_MOITION_THRESHOLD = 7
+} MOTION_THRESHOLD;
+
+typedef enum {
LAST_FRAME_PARTITION_OFF = 0,
LAST_FRAME_PARTITION_LOW_MOTION = 1,
LAST_FRAME_PARTITION_ALL = 2
@@ -200,6 +205,10 @@ typedef struct SPEED_FEATURES {
// partitioning.
LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
+ // The threshold is to determine how slow the motino is, it is used when
+ // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION
+ MOTION_THRESHOLD lf_motion_threshold;
+
// Determine which method we use to determine transform size. We can choose
// between options like full rd, largest for prediction size, largest
// for intra and model coefs for the rest.
@@ -322,8 +331,8 @@ typedef struct SPEED_FEATURES {
int use_nonrd_pick_mode;
// A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
- // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
- int disable_inter_mode_mask[BLOCK_SIZES];
+ // modes are used in order from LSB to MSB for each BLOCK_SIZE.
+ int inter_mode_mask[BLOCK_SIZES];
// This feature controls whether we do the expensive context update and
// calculation in the rd coefficient costing loop.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index dd28496be..c25314b42 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -46,13 +46,12 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lrc->key_frame_rate_correction_factor = 1.0;
if (svc->number_temporal_layers > 1) {
- lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+ lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
} else {
- lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
- lrc->last_q[0] = oxcf->best_allowed_q;
- lrc->last_q[1] = oxcf->best_allowed_q;
- lrc->last_q[2] = oxcf->best_allowed_q;
+ lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
+ lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
+ lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
}
lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level),
@@ -82,9 +81,9 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
RATE_CONTROL *const lrc = &lc->rc;
if (svc->number_temporal_layers > 1) {
- lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+ lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
} else {
- lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+ lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
}
bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
// Update buffer-related quantities.
@@ -132,8 +131,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
} else {
const double prev_layer_framerate =
oxcf->framerate / oxcf->ts_rate_decimator[layer - 1];
- const int prev_layer_target_bandwidth =
- oxcf->ts_target_bitrate[layer - 1] * 1000;
+ const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1];
lc->avg_frame_size =
(int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
(lc->framerate - prev_layer_framerate));
@@ -151,20 +149,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {
oxcf->two_pass_vbrmin_section / 100);
lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth *
oxcf->two_pass_vbrmax_section) / 100);
- lrc->max_gf_interval = 16;
-
- lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
-
- if (oxcf->play_alternate && oxcf->lag_in_frames) {
- if (lrc->max_gf_interval > oxcf->lag_in_frames - 1)
- lrc->max_gf_interval = oxcf->lag_in_frames - 1;
-
- if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
-
- if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval)
- lrc->max_gf_interval = lrc->static_scene_max_gf_interval;
+ vp9_rc_set_gf_max_interval(oxcf, lrc);
}
void vp9_restore_layer_context(VP9_COMP *const cpi) {
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 91d8ea4dc..eb5ae2e41 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -156,6 +156,18 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
}
+void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
+}
+
+void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
+}
+
unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index c47fe1335..4a194b72c 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -25,15 +25,13 @@ void variance(const uint8_t *a, int a_stride,
typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
- int ref_stride,
- unsigned int max_sad);
+ int ref_stride);
typedef unsigned int(*vp9_sad_avg_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int ref_stride,
- const uint8_t *second_pred,
- unsigned int max_sad);
+ const uint8_t *second_pred);
typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr,
int source_stride,
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
index 8723a7114..8723a7114 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 48ccef8cc..48ccef8cc 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
diff --git a/vp9/encoder/x86/vp9_ssim_opt.asm b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
index 455d10d2c..455d10d2c 100644
--- a/vp9/encoder/x86/vp9_ssim_opt.asm
+++ b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
index 673e0b3a6..21aaa9383 100644
--- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
+++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
@@ -43,9 +43,9 @@ sym(vp9_temporal_filter_apply_sse2):
mov [rsp + rbp_backup], rbp
; end prolog
- mov rdx, arg(3)
+ mov edx, arg(3)
mov [rsp + block_width], rdx
- mov rdx, arg(4)
+ mov edx, arg(4)
mov [rsp + block_height], rdx
movd xmm6, arg(5)
movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
diff --git a/vp9/encoder/x86/vp9_variance_avx2.c b/vp9/encoder/x86/vp9_variance_avx2.c
index 835c51957..7f81f46b8 100644
--- a/vp9/encoder/x86/vp9_variance_avx2.c
+++ b/vp9/encoder/x86/vp9_variance_avx2.c
@@ -10,7 +10,6 @@
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_pragmas.h"
#include "vpx_ports/mem.h"
typedef void (*get_var_avx2) (
diff --git a/vp9/encoder/x86/vp9_variance_mmx.c b/vp9/encoder/x86/vp9_variance_mmx.c
index c4d17fc0f..ae2f976af 100644
--- a/vp9/encoder/x86/vp9_variance_mmx.c
+++ b/vp9/encoder/x86/vp9_variance_mmx.c
@@ -10,7 +10,6 @@
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_pragmas.h"
#include "vpx_ports/mem.h"
extern unsigned int vp9_get8x8var_mmx
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 41f225922..e935a233a 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -11,53 +11,29 @@
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_pragmas.h"
#include "vpx_ports/mem.h"
-extern unsigned int vp9_get4x4var_mmx
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-
-unsigned int vp9_get16x16var_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-unsigned int vp9_get8x8var_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-
-typedef unsigned int (*get_var_sse2) (
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-
-static void variance_sse2(const unsigned char *src_ptr, int source_stride,
- const unsigned char *ref_ptr, int recon_stride,
- int w, int h, unsigned int *sse, int *sum,
- get_var_sse2 var_fn, int block_size) {
- unsigned int sse0;
- int sum0;
+typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+unsigned int vp9_get4x4var_mmx(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+
+unsigned int vp9_get8x8var_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+unsigned int vp9_get16x16var_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+static void variance_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ int w, int h, unsigned int *sse, int *sum,
+ variance_fn_t var_fn, int block_size) {
int i, j;
*sse = 0;
@@ -65,217 +41,139 @@ static void variance_sse2(const unsigned char *src_ptr, int source_stride,
for (i = 0; i < h; i += block_size) {
for (j = 0; j < w; j += block_size) {
- var_fn(src_ptr + source_stride * i + j, source_stride,
- ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0);
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
*sse += sse0;
*sum += sum0;
}
}
}
-unsigned int vp9_variance4x4_sse2(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4,
- &var, &avg, vp9_get4x4var_mmx, 4);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 4));
+unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 4, 4,
+ sse, &sum, vp9_get4x4var_mmx, 4);
+ return *sse - (((unsigned int)sum * sum) >> 4);
}
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4,
- &var, &avg, vp9_get4x4var_mmx, 4);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 5));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
+ sse, &sum, vp9_get4x4var_mmx, 4);
+ return *sse - (((unsigned int)sum * sum) >> 5);
}
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8,
- &var, &avg, vp9_get4x4var_mmx, 4);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 5));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
+ sse, &sum, vp9_get4x4var_mmx, 4);
+ return *sse - (((unsigned int)sum * sum) >> 5);
}
-unsigned int vp9_variance8x8_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
- &var, &avg, vp9_get8x8var_sse2, 8);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 6));
+unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vp9_get8x8var_sse2, 8);
+ return *sse - (((unsigned int)sum * sum) >> 6);
}
-unsigned int vp9_variance16x8_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
- &var, &avg, vp9_get8x8var_sse2, 8);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
+unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
+ sse, &sum, vp9_get8x8var_sse2, 8);
+ return *sse - (((unsigned int)sum * sum) >> 7);
}
-unsigned int vp9_variance8x16_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
- &var, &avg, vp9_get8x8var_sse2, 8);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
+unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
+ sse, &sum, vp9_get8x8var_sse2, 8);
+ return *sse - (((unsigned int)sum * sum) >> 7);
}
-unsigned int vp9_variance16x16_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 8));
+unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((unsigned int)sum * sum) >> 8);
}
-unsigned int vp9_mse16x16_sse2(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int sse0;
- int sum0;
- vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
- &sum0);
- *sse = sse0;
- return sse0;
+unsigned int vp9_mse16x16_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse;
}
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((int64_t)avg * avg) >> 10));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 32, 32,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 10);
}
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((int64_t)avg * avg) >> 9));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 32, 16,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 9);
}
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((int64_t)avg * avg) >> 9));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 16, 32,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 9);
}
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((int64_t)avg * avg) >> 12));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 64, 64,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 12);
}
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((int64_t)avg * avg) >> 11));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 64, 32,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 11);
}
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64,
- &var, &avg, vp9_get16x16var_sse2, 16);
- *sse = var;
- return (var - (((int64_t)avg * avg) >> 11));
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 32, 64,
+ sse, &sum, vp9_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 11);
}
#define DECL(w, opt) \
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 3b4d6b901..8c1f34567 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -10,7 +10,6 @@
VP9_COMMON_SRCS-yes += vp9_common.mk
VP9_COMMON_SRCS-yes += vp9_iface_common.h
-VP9_COMMON_SRCS-yes += common/vp9_pragmas.h
VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
VP9_COMMON_SRCS-yes += common/vp9_blockd.c
@@ -119,9 +118,10 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_horiz_loopfilter_d
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
-
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_intrin_ssse3.c
ifeq ($(ARCH_X86_64), yes)
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
endif
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve_neon.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 5a8a4f4fe..d52424ad6 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -327,7 +327,8 @@ static vpx_codec_err_t set_encoder_config(
else if (cfg->rc_end_usage == VPX_CBR)
oxcf->rc_mode = RC_MODE_CBR;
- oxcf->target_bandwidth = cfg->rc_target_bitrate;
+ // Convert target bandwidth from Kbit/s to Bit/s
+ oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
oxcf->best_allowed_q = vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
@@ -387,7 +388,9 @@ static vpx_codec_err_t set_encoder_config(
oxcf->ss_number_layers = cfg->ss_number_layers;
if (oxcf->ss_number_layers > 1) {
- vp9_copy(oxcf->ss_target_bitrate, cfg->ss_target_bitrate);
+ int i;
+ for (i = 0; i < VPX_SS_MAX_LAYERS; ++i)
+ oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i];
} else if (oxcf->ss_number_layers == 1) {
oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
}
@@ -395,8 +398,11 @@ static vpx_codec_err_t set_encoder_config(
oxcf->ts_number_layers = cfg->ts_number_layers;
if (oxcf->ts_number_layers > 1) {
- vp9_copy(oxcf->ts_target_bitrate, cfg->ts_target_bitrate);
- vp9_copy(oxcf->ts_rate_decimator, cfg->ts_rate_decimator);
+ int i;
+ for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) {
+ oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i];
+ oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i];
+ }
} else if (oxcf->ts_number_layers == 1) {
oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
oxcf->ts_rate_decimator[0] = 1;
@@ -462,64 +468,154 @@ static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx,
return res;
}
-static vpx_codec_err_t ctrl_get_param(vpx_codec_alg_priv_t *ctx, int ctrl_id,
- va_list args) {
- void *const arg = va_arg(args, void *);
-
-#define MAP(id, var) case id: *(RECAST(id, arg)) = var; break
+static vpx_codec_err_t ctrl_get_quantizer(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const arg = va_arg(args, int *);
+ if (arg == NULL)
+ return VPX_CODEC_INVALID_PARAM;
+ *arg = vp9_get_quantizer(ctx->cpi);
+ return VPX_CODEC_OK;
+}
+static vpx_codec_err_t ctrl_get_quantizer64(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const arg = va_arg(args, int *);
if (arg == NULL)
return VPX_CODEC_INVALID_PARAM;
+ *arg = vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi));
+ return VPX_CODEC_OK;
+}
- switch (ctrl_id) {
- MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi));
- MAP(VP8E_GET_LAST_QUANTIZER_64,
- vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi)));
+static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx,
+ const struct vp9_extracfg *extra_cfg) {
+ const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg);
+ if (res == VPX_CODEC_OK) {
+ ctx->extra_cfg = *extra_cfg;
+ set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
+ vp9_change_config(ctx->cpi, &ctx->oxcf);
}
+ return res;
+}
- return VPX_CODEC_OK;
-#undef MAP
+static vpx_codec_err_t ctrl_set_cpuused(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args);
+ return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_enable_auto_alt_ref(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.enable_auto_alt_ref = CAST(VP8E_SET_ENABLEAUTOALTREF, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
-static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id,
- va_list args) {
- vpx_codec_err_t res = VPX_CODEC_OK;
+static vpx_codec_err_t ctrl_set_noise_sensitivity(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.noise_sensitivity = CAST(VP8E_SET_NOISE_SENSITIVITY, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
-#define MAP(id, var) case id: var = CAST(id, args); break;
-
- switch (ctrl_id) {
- MAP(VP8E_SET_CPUUSED, extra_cfg.cpu_used);
- MAP(VP8E_SET_ENABLEAUTOALTREF, extra_cfg.enable_auto_alt_ref);
- MAP(VP8E_SET_NOISE_SENSITIVITY, extra_cfg.noise_sensitivity);
- MAP(VP8E_SET_SHARPNESS, extra_cfg.sharpness);
- MAP(VP8E_SET_STATIC_THRESHOLD, extra_cfg.static_thresh);
- MAP(VP9E_SET_TILE_COLUMNS, extra_cfg.tile_columns);
- MAP(VP9E_SET_TILE_ROWS, extra_cfg.tile_rows);
- MAP(VP8E_SET_ARNR_MAXFRAMES, extra_cfg.arnr_max_frames);
- MAP(VP8E_SET_ARNR_STRENGTH, extra_cfg.arnr_strength);
- MAP(VP8E_SET_ARNR_TYPE, extra_cfg.arnr_type);
- MAP(VP8E_SET_TUNING, extra_cfg.tuning);
- MAP(VP8E_SET_CQ_LEVEL, extra_cfg.cq_level);
- MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, extra_cfg.rc_max_intra_bitrate_pct);
- MAP(VP9E_SET_LOSSLESS, extra_cfg.lossless);
- MAP(VP9E_SET_FRAME_PARALLEL_DECODING,
- extra_cfg.frame_parallel_decoding_mode);
- MAP(VP9E_SET_AQ_MODE, extra_cfg.aq_mode);
- MAP(VP9E_SET_FRAME_PERIODIC_BOOST, extra_cfg.frame_periodic_boost);
- }
+static vpx_codec_err_t ctrl_set_sharpness(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.sharpness = CAST(VP8E_SET_SHARPNESS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
- res = validate_config(ctx, &ctx->cfg, &extra_cfg);
+static vpx_codec_err_t ctrl_set_static_thresh(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.static_thresh = CAST(VP8E_SET_STATIC_THRESHOLD, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
- if (res == VPX_CODEC_OK) {
- ctx->extra_cfg = extra_cfg;
- set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
- vp9_change_config(ctx->cpi, &ctx->oxcf);
- }
+static vpx_codec_err_t ctrl_set_tile_columns(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.tile_columns = CAST(VP9E_SET_TILE_COLUMNS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
- return res;
-#undef MAP
+static vpx_codec_err_t ctrl_set_tile_rows(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.tile_rows = CAST(VP9E_SET_TILE_ROWS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.arnr_max_frames = CAST(VP8E_SET_ARNR_MAXFRAMES, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_arnr_strength(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.arnr_strength = CAST(VP8E_SET_ARNR_STRENGTH, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_arnr_type(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.arnr_type = CAST(VP8E_SET_ARNR_TYPE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_tuning(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.tuning = CAST(VP8E_SET_TUNING, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_cq_level(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.cq_level = CAST(VP8E_SET_CQ_LEVEL, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_rc_max_intra_bitrate_pct(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.rc_max_intra_bitrate_pct =
+ CAST(VP8E_SET_MAX_INTRA_BITRATE_PCT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.lossless = CAST(VP9E_SET_LOSSLESS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_frame_parallel_decoding_mode(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.frame_parallel_decoding_mode =
+ CAST(VP9E_SET_FRAME_PARALLEL_DECODING, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_aq_mode(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.aq_mode = CAST(VP9E_SET_AQ_MODE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_frame_periodic_boost(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.frame_periodic_boost = CAST(VP9E_SET_FRAME_PERIODIC_BOOST, args);
+ return update_extra_cfg(ctx, &extra_cfg);
}
static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
@@ -887,9 +983,8 @@ static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
- (void)ctrl_id;
if (frame != NULL) {
YV12_BUFFER_CONFIG sd;
@@ -904,9 +999,8 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
- (void)ctrl_id;
if (frame != NULL) {
YV12_BUFFER_CONFIG sd;
@@ -921,9 +1015,8 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *);
- (void)ctrl_id;
if (frame != NULL) {
YV12_BUFFER_CONFIG *fb;
@@ -937,11 +1030,9 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
- int ctr_id, va_list args) {
+ va_list args) {
#if CONFIG_VP9_POSTPROC
vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
- (void)ctr_id;
-
if (config != NULL) {
ctx->preview_ppcfg = *config;
return VPX_CODEC_OK;
@@ -950,7 +1041,6 @@ static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
}
#else
(void)ctx;
- (void)ctr_id;
(void)args;
return VPX_CODEC_INCAPABLE;
#endif
@@ -977,36 +1067,32 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
}
static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
const int update = va_arg(args, int);
- (void)ctrl_id;
vp9_update_entropy(ctx->cpi, update);
return VPX_CODEC_OK;
}
static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
const int ref_frame_flags = va_arg(args, int);
- (void)ctrl_id;
vp9_update_reference(ctx->cpi, ref_frame_flags);
return VPX_CODEC_OK;
}
static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
const int reference_flag = va_arg(args, int);
- (void)ctrl_id;
vp9_use_as_reference(ctx->cpi, reference_flag);
return VPX_CODEC_OK;
}
static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
(void)ctx;
- (void)ctrl_id;
(void)args;
// TODO(yaowu): Need to re-implement and test for VP9.
@@ -1015,9 +1101,8 @@ static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
- (void)ctrl_id;
if (map) {
if (!vp9_set_active_map(ctx->cpi, map->active_map,
@@ -1031,9 +1116,8 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *);
- (void)ctrl_id;
if (mode) {
const int res = vp9_set_internal_size(ctx->cpi,
@@ -1045,11 +1129,9 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
}
}
-static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctrl_id,
- va_list args) {
+static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
int data = va_arg(args, int);
const vpx_codec_enc_cfg_t *cfg = &ctx->cfg;
- (void)ctrl_id;
vp9_set_svc(ctx->cpi, data);
// CBR or two pass mode for SVC with both temporal and spatial layers
@@ -1066,11 +1148,10 @@ static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctrl_id,
}
static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *);
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
- (void)ctrl_id;
svc->spatial_layer_id = data->spatial_layer_id;
svc->temporal_layer_id = data->temporal_layer_id;
@@ -1087,10 +1168,9 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
VP9_COMP *const cpi = ctx->cpi;
vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *);
- (void)ctrl_id;
if (params == NULL)
return VPX_CODEC_INVALID_PARAM;
@@ -1126,30 +1206,30 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP8E_SET_ROI_MAP, ctrl_set_roi_map},
{VP8E_SET_ACTIVEMAP, ctrl_set_active_map},
{VP8E_SET_SCALEMODE, ctrl_set_scale_mode},
- {VP8E_SET_CPUUSED, ctrl_set_param},
- {VP8E_SET_NOISE_SENSITIVITY, ctrl_set_param},
- {VP8E_SET_ENABLEAUTOALTREF, ctrl_set_param},
- {VP8E_SET_SHARPNESS, ctrl_set_param},
- {VP8E_SET_STATIC_THRESHOLD, ctrl_set_param},
- {VP9E_SET_TILE_COLUMNS, ctrl_set_param},
- {VP9E_SET_TILE_ROWS, ctrl_set_param},
- {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_param},
- {VP8E_SET_ARNR_STRENGTH, ctrl_set_param},
- {VP8E_SET_ARNR_TYPE, ctrl_set_param},
- {VP8E_SET_TUNING, ctrl_set_param},
- {VP8E_SET_CQ_LEVEL, ctrl_set_param},
- {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_param},
- {VP9E_SET_LOSSLESS, ctrl_set_param},
- {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_param},
- {VP9E_SET_AQ_MODE, ctrl_set_param},
- {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_param},
+ {VP8E_SET_CPUUSED, ctrl_set_cpuused},
+ {VP8E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity},
+ {VP8E_SET_ENABLEAUTOALTREF, ctrl_set_enable_auto_alt_ref},
+ {VP8E_SET_SHARPNESS, ctrl_set_sharpness},
+ {VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh},
+ {VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns},
+ {VP9E_SET_TILE_ROWS, ctrl_set_tile_rows},
+ {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames},
+ {VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength},
+ {VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type},
+ {VP8E_SET_TUNING, ctrl_set_tuning},
+ {VP8E_SET_CQ_LEVEL, ctrl_set_cq_level},
+ {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct},
+ {VP9E_SET_LOSSLESS, ctrl_set_lossless},
+ {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode},
+ {VP9E_SET_AQ_MODE, ctrl_set_aq_mode},
+ {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost},
{VP9E_SET_SVC, ctrl_set_svc},
{VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters},
{VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id},
// Getters
- {VP8E_GET_LAST_QUANTIZER, ctrl_get_param},
- {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_param},
+ {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
+ {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64},
{VP9_GET_REFERENCE, ctrl_get_reference},
{ -1, NULL},
@@ -1182,7 +1262,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
VPX_VBR, // rc_end_usage
#if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION)
- {0}, // rc_twopass_stats_in
+ {NULL, 0}, // rc_twopass_stats_in
#endif
256, // rc_target_bandwidth
0, // rc_min_quantizer
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 328b98fee..48110b414 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -38,7 +38,6 @@ struct vpx_codec_alg_priv {
vpx_decrypt_cb decrypt_cb;
void *decrypt_state;
vpx_image_t img;
- int img_avail;
int invert_tile_order;
// External frame buffer info to save for VP9 common.
@@ -48,10 +47,12 @@ struct vpx_codec_alg_priv {
};
static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
- vpx_codec_priv_enc_mr_cfg_t *data) {
+ vpx_codec_priv_enc_mr_cfg_t *data) {
// This function only allocates space for the vpx_codec_alg_priv_t
// structure. More memory may be required at the time the stream
// information becomes known.
+ (void)data;
+
if (!ctx->priv) {
vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv));
if (alg_priv == NULL)
@@ -243,14 +244,11 @@ static void init_decoder(vpx_codec_alg_priv_t *ctx) {
static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
const uint8_t **data, unsigned int data_sz,
void *user_priv, int64_t deadline) {
- YV12_BUFFER_CONFIG sd = { 0 };
vp9_ppflags_t flags = {0};
VP9_COMMON *cm = NULL;
(void)deadline;
- ctx->img_avail = 0;
-
// Determine the stream parameters. Note that we rely on peek_si to
// validate that we have a buffer that does not wrap around the top
// of the heap.
@@ -260,6 +258,9 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
ctx->decrypt_state);
if (res != VPX_CODEC_OK)
return res;
+
+ if (!ctx->si.is_kf)
+ return VPX_CODEC_ERROR;
}
// Initialize the decoder instance on the first frame
@@ -282,13 +283,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
set_ppflags(ctx, &flags);
- if (vp9_get_raw_frame(ctx->pbi, &sd, &flags))
- return update_error_state(ctx, &cm->error);
-
- yuvconfig2image(&ctx->img, &sd, user_priv);
- ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
- ctx->img_avail = 1;
-
return VPX_CODEC_OK;
}
@@ -417,15 +411,20 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
vpx_codec_iter_t *iter) {
vpx_image_t *img = NULL;
- if (ctx->img_avail) {
- // iter acts as a flip flop, so an image is only returned on the first
- // call to get_frame.
- if (!(*iter)) {
+ // iter acts as a flip flop, so an image is only returned on the first
+ // call to get_frame.
+ if (*iter == NULL && ctx->pbi != NULL) {
+ YV12_BUFFER_CONFIG sd;
+ vp9_ppflags_t flags = {0, 0, 0};
+
+ if (vp9_get_raw_frame(ctx->pbi, &sd, &flags) == 0) {
+ VP9_COMMON *cm = &ctx->pbi->common;
+ yuvconfig2image(&ctx->img, &sd, NULL);
+ ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
img = &ctx->img;
*iter = img;
}
}
- ctx->img_avail = 0;
return img;
}
@@ -449,7 +448,7 @@ static vpx_codec_err_t decoder_set_fb_fn(
}
static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
- int ctr_id, va_list args) {
+ va_list args) {
vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *);
if (data) {
@@ -465,7 +464,7 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
- int ctr_id, va_list args) {
+ va_list args) {
vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
if (data) {
@@ -482,7 +481,7 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
- int ctr_id, va_list args) {
+ va_list args) {
vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
if (data) {
@@ -497,7 +496,7 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
- int ctr_id, va_list args) {
+ va_list args) {
#if CONFIG_VP9_POSTPROC
vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
@@ -509,17 +508,21 @@ static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_INVALID_PARAM;
}
#else
+ (void)ctx;
+ (void)args;
return VPX_CODEC_INCAPABLE;
#endif
}
static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
+ (void)ctx;
+ (void)args;
return VPX_CODEC_INCAPABLE;
}
static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
int *const update_info = va_arg(args, int *);
if (update_info) {
@@ -535,7 +538,7 @@ static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
int *corrupted = va_arg(args, int *);
if (corrupted) {
@@ -550,7 +553,7 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
- int ctrl_id, va_list args) {
+ va_list args) {
int *const display_size = va_arg(args, int *);
if (display_size) {
@@ -568,13 +571,12 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
}
static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
- int ctr_id, va_list args) {
+ va_list args) {
ctx->invert_tile_order = va_arg(args, int);
return VPX_CODEC_OK;
}
static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx,
- int ctrl_id,
va_list args) {
vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *);
ctx->decrypt_cb = init ? init->decrypt_cb : NULL;
@@ -625,11 +627,12 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
decoder_set_fb_fn, // vpx_codec_set_fb_fn_t
},
{ // NOLINT
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED
+ NOT_IMPLEMENTED, // vpx_codec_enc_cfg_map_t
+ NOT_IMPLEMENTED, // vpx_codec_encode_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_get_cx_data_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_enc_config_set_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t
+ NOT_IMPLEMENTED, // vpx_codec_get_preview_frame_fn_t
+ NOT_IMPLEMENTED // vpx_codec_enc_mr_get_mem_loc_fn_t
}
};
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 6e5c521ab..a44ffc1ca 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -112,12 +112,12 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
endif
ifeq ($(ARCH_X86_64),yes)
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
endif
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
-VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
+VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c