diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 60 | ||||
-rw-r--r-- | vp9/common/x86/vp9_idct_intrin_ssse3.c | 5 | ||||
-rw-r--r-- | vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c | 32 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 28 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 189 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.h | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 64 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 19 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_sad.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.c | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.h | 6 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 14 |
17 files changed, 263 insertions, 230 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index c348424da..c300cde62 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -422,6 +422,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; +add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_16x16 sse2/; +$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2; + add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; @@ -431,6 +435,10 @@ specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc"; +add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_8x8 sse2/; +$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2; + add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance8x4/, "$sse2_x86inc"; @@ -520,82 +528,82 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; -add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad64x64/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad32x64/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad64x32/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad32x16/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x32/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad32x32/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x4/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad4x8/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc"; -add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; diff --git a/vp9/common/x86/vp9_idct_intrin_ssse3.c b/vp9/common/x86/vp9_idct_intrin_ssse3.c index 9a6980e0f..e5d3cb5f4 100644 --- a/vp9/common/x86/vp9_idct_intrin_ssse3.c +++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c @@ -8,6 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ +#if defined(_MSC_VER) && _MSC_VER <= 1500 +// Need to include math.h before calling tmmintrin.h/intrin.h +// in certain versions of MSVS. +#include <math.h> +#endif #include <tmmintrin.h> // SSSE3 #include "vp9/common/x86/vp9_idct_intrin_sse2.h" diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c index b84db970e..d109e136a 100644 --- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c @@ -111,21 +111,21 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, // filter the source buffer srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt2Reg); + srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg); // multiply 2 adjacent elements with the filter and add the result srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters); + srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); // add and saturate the results together srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2); // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt4Reg); + srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg); srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg); // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters); + srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); // add and saturate the results together @@ -146,21 +146,21 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, // filter the source buffer srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg); - srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg); + srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg); // multiply 2 adjacent elements with the filter and add the result srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters); + srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); // add and saturate the results together srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2); // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt4Reg); + srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg); srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg); // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters); + srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); // add and saturate the results together @@ -208,26 +208,26 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg)); srcRegFilt2 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt2Reg)); + _mm256_castsi256_si128(filt4Reg)); // multiply 2 adjacent elements with the filter and add the result srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters)); srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(secondFilters)); + _mm256_castsi256_si128(forthFilters)); // add and saturate the results together srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); // filter the source buffer srcRegFilt3= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt4Reg)); + _mm256_castsi256_si128(filt2Reg)); srcRegFilt2= _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt3Reg)); // multiply 2 adjacent elements with the filter and add the result srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(forthFilters)); + _mm256_castsi256_si128(secondFilters)); srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters)); @@ -247,26 +247,26 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt1Reg)); srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt2Reg)); + _mm256_castsi256_si128(filt4Reg)); // multiply 2 adjacent elements with the filter and add the result srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, _mm256_castsi256_si128(firstFilters)); srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(secondFilters)); + _mm256_castsi256_si128(forthFilters)); // add and saturate the results together srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); // filter the source buffer srcRegFilt3 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt4Reg)); + _mm256_castsi256_si128(filt2Reg)); srcRegFilt2 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt3Reg)); // multiply 2 adjacent elements with the filter and add the result srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(forthFilters)); + _mm256_castsi256_si128(secondFilters)); srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters)); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ef0871873..6cbc38d79 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -76,18 +76,6 @@ typedef struct { unsigned int var; } diff; -static void get_sse_sum_8x8(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum); -} - -static void get_sse_sum_16x16(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum); -} - static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs) { @@ -490,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi, unsigned int sse = 0; int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) - get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); + vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); } } @@ -1226,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi, int b_offset = b_mi_row * MI_SIZE * src_stride + b_mi_col * MI_SIZE; - get_sse_sum_16x16(src + b_offset, src_stride, - pre_src + b_offset, pre_stride, - &d16[j].sse, &d16[j].sum); + vp9_get_sse_sum_16x16(src + b_offset, src_stride, + pre_src + b_offset, pre_stride, + &d16[j].sse, &d16[j].sum); d16[j].var = d16[j].sse - (((uint32_t)d16[j].sum * d16[j].sum) >> 8); @@ -1303,14 +1291,14 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, if (row8x8_remaining >= MI_BLOCK_SIZE && col8x8_remaining >= MI_BLOCK_SIZE) { this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride, - pre, pre_stride, 0x7fffffff); + pre, pre_stride); threshold = (1 << 12); } else { int r, c; for (r = 0; r < row8x8_remaining; r += 2) for (c = 0; c < col8x8_remaining; c += 2) - this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, pre, - pre_stride, 0x7fffffff); + this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, + pre, pre_stride); threshold = (row8x8_remaining * col8x8_remaining) << 6; } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 1f68f03c4..03bf4b0b6 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -632,9 +632,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->oxcf.maximum_buffer_size = 240000; } - // Convert target bandwidth from Kbit/s to Bit/s - cpi->oxcf.target_bandwidth *= 1000; - cpi->oxcf.starting_buffer_level = vp9_rescale(cpi->oxcf.starting_buffer_level, cpi->oxcf.target_bandwidth, 1000); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 247e5eeb5..0b9a5ac7a 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -64,8 +64,7 @@ #define MIN_GF_INTERVAL 4 #endif - -// #define LONG_TERM_VBR_CORRECTION +#define LONG_TERM_VBR_CORRECTION static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; @@ -1455,6 +1454,66 @@ static int calculate_boost_bits(int frame_count, return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0); } +static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, + double group_error, int gf_arf_bits) { + RATE_CONTROL *const rc = &cpi->rc; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + TWO_PASS *twopass = &cpi->twopass; + FIRSTPASS_STATS frame_stats; + int i; + int group_frame_index = 1; + int target_frame_size; + int key_frame; + const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + int64_t total_group_bits = gf_group_bits; + double modified_err = 0.0; + double err_fraction; + + key_frame = cpi->common.frame_type == KEY_FRAME || + vp9_is_upper_layer_key_frame(cpi); + + // For key frames the frame target rate is already set and it + // is also the golden frame. + // NOTE: We dont bother to check for the special case of ARF overlay + // frames here, as there is clamping code for this in the function + // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass + // encodes. + if (!key_frame) { + twopass->gf_group_bit_allocation[0] = gf_arf_bits; + + // Step over the golden frame / overlay frame + if (EOF == input_stats(twopass, &frame_stats)) + return; + } + + // Store the bits to spend on the ARF if there is one. + if (rc->source_alt_ref_pending) { + twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits; + } + + // Deduct the boost bits for arf or gf if it is not a key frame. + if (rc->source_alt_ref_pending || !key_frame) + total_group_bits -= gf_arf_bits; + + // Allocate bits to the other frames in the group. + for (i = 0; i < rc->baseline_gf_interval - 1; ++i) { + if (EOF == input_stats(twopass, &frame_stats)) + break; + + modified_err = calculate_modified_err(twopass, oxcf, &frame_stats); + + if (group_error > 0) + err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error); + else + err_fraction = 0.0; + + target_frame_size = (int)((double)total_group_bits * err_fraction); + target_frame_size = clamp(target_frame_size, 0, + MIN(max_bits, (int)total_group_bits)); + + twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size; + } +} // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { @@ -1487,11 +1546,21 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int b_boost = 0; int flash_detected; int active_max_gf_interval; + int64_t gf_group_bits; + double gf_group_error_left; + int gf_arf_bits; + + // Reset the GF group data structures unless this is a key + // frame in which case it will already have been done. + if (cpi->common.frame_type != KEY_FRAME) { + twopass->gf_group_index = 0; + vp9_zero(twopass->gf_group_bit_allocation); + } vp9_clear_system_state(); vp9_zero(next_frame); - twopass->gf_group_bits = 0; + gf_group_bits = 0; // Load stats for the current frame. mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); @@ -1697,7 +1766,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { reset_fpf_position(twopass, start_pos); // Calculate the bits to be allocated to the gf/arf group as a whole - twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); + gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); // Calculate the extra bits to be used for boosted frame(s) { @@ -1708,19 +1777,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200); // Calculate the extra bits to be used for boosted frame(s) - twopass->gf_bits = calculate_boost_bits(rc->baseline_gf_interval, - boost, twopass->gf_group_bits); - - - // For key frames the frame target rate is set already. - // NOTE: We dont bother to check for the special case of ARF overlay - // frames here, as there is clamping code for this in the function - // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass - // encodes. - if (cpi->common.frame_type != KEY_FRAME && - !vp9_is_upper_layer_key_frame(cpi)) { - vp9_rc_set_frame_target(cpi, twopass->gf_bits); - } + gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, + boost, gf_group_bits); } // Adjust KF group bits and error remaining. @@ -1733,14 +1791,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // For normal GFs remove the score for the GF itself unless this is // also a key frame in which case it has already been accounted for. if (rc->source_alt_ref_pending) { - twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err); + gf_group_error_left = gf_group_err - mod_frame_err; } else if (cpi->common.frame_type != KEY_FRAME) { - twopass->gf_group_error_left = (int64_t)(gf_group_err - - gf_first_frame_err); + gf_group_error_left = gf_group_err - gf_first_frame_err; } else { - twopass->gf_group_error_left = (int64_t)gf_group_err; + gf_group_error_left = gf_group_err; } + // Allocate bits to each of the frames in the GF group. + allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits); + + // Reset the file position. + reset_fpf_position(twopass, start_pos); + // Calculate a section intra ratio used in setting max loop filter. if (cpi->common.frame_type != KEY_FRAME) { twopass->section_intra_rating = @@ -1749,29 +1812,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } -// Allocate bits to a normal frame that is neither a gf an arf or a key frame. -static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - TWO_PASS *const twopass = &cpi->twopass; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - // For a single frame. - const int max_bits = frame_max_bits(&cpi->rc, oxcf); - // Calculate modified prediction error used in bit allocation. - const double modified_err = calculate_modified_err(twopass, oxcf, this_frame); - // What portion of the remaining GF group error is used by this frame. - const double err_fraction = twopass->gf_group_error_left > 0 ? - modified_err / twopass->gf_group_error_left : 0.0; - // How many of those bits available for allocation should we give it? Clip - // target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end. - const int target_frame_size = - clamp((int)(twopass->gf_group_bits * err_fraction), - 0, MIN(max_bits, (int)twopass->gf_group_bits)); - // Adjust error and bits remaining. - twopass->gf_group_error_left -= (int64_t)modified_err; - - // Per frame bit target for this frame. - vp9_rc_set_frame_target(cpi, target_frame_size); -} - static int test_candidate_kf(TWO_PASS *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, @@ -1858,6 +1898,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { const FIRSTPASS_STATS *const start_position = twopass->stats_in; FIRSTPASS_STATS next_frame; FIRSTPASS_STATS last_frame; + int kf_bits = 0; double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; double boost_score = 0.0; @@ -1869,6 +1910,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->common.frame_type = KEY_FRAME; + // Reset the GF group data structures. + twopass->gf_group_index = 0; + vp9_zero(twopass->gf_group_bit_allocation); + // Is this a forced key frame by interval. rc->this_key_frame_forced = rc->next_key_frame_forced; @@ -2052,13 +2097,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (rc->kf_boost < MIN_KF_BOOST) rc->kf_boost = MIN_KF_BOOST; - twopass->kf_bits = calculate_boost_bits((rc->frames_to_key - 1), - rc->kf_boost, twopass->kf_group_bits); + kf_bits = calculate_boost_bits((rc->frames_to_key - 1), + rc->kf_boost, twopass->kf_group_bits); - twopass->kf_group_bits -= twopass->kf_bits; + twopass->kf_group_bits -= kf_bits; - // Per frame bit target for this frame. - vp9_rc_set_frame_target(cpi, twopass->kf_bits); + // Save the bits to spend on the key frame. + twopass->gf_group_bit_allocation[0] = kf_bits; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err); @@ -2107,7 +2152,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; - int target; + int target_rate; LAYER_CONTEXT *lc = NULL; const int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); @@ -2123,16 +2168,23 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (!twopass->stats_in) return; + // Increment the gf group index. + ++twopass->gf_group_index; + + // If this is an arf frame then we dont want to read the stats file or + // advance the input pointer as we already have what we need. if (cpi->refresh_alt_ref_frame) { - int modified_target = twopass->gf_bits; - rc->base_frame_target = twopass->gf_bits; - cm->frame_type = INTER_FRAME; + int target_rate; + target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index]; + target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); + rc->base_frame_target = target_rate; #ifdef LONG_TERM_VBR_CORRECTION // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == RC_MODE_VBR) - vbr_rate_correction(&modified_target, rc->vbr_bits_off_target); + vbr_rate_correction(&target_rate, rc->vbr_bits_off_target); #endif - vp9_rc_set_frame_target(cpi, modified_target); + vp9_rc_set_frame_target(cpi, target_rate); + cm->frame_type = INTER_FRAME; return; } @@ -2160,11 +2212,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (EOF == input_stats(twopass, &this_frame)) return; + // Local copy of the current frame's first pass stats. + this_frame_copy = this_frame; + // Keyframe and section processing. if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) { // Define next KF group and assign bits to it. - this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); } else { cm->frame_type = INTER_FRAME; @@ -2183,11 +2237,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } } - // Is this frame a GF / ARF? (Note: a key frame is always also a GF). + // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { - // Define next gf group and assign bits to it. - this_frame_copy = this_frame; - #if CONFIG_MULTIPLE_ARF if (cpi->multi_arf_enabled) { define_fixed_arf_period(cpi); @@ -2210,11 +2261,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { rc->frames_till_gf_update_due = rc->baseline_gf_interval; cpi->refresh_golden_frame = 1; - } else { - // Otherwise this is an ordinary frame. - // Assign bits from those allocated to the GF group. - this_frame_copy = this_frame; - assign_std_frame_bits(cpi, &this_frame_copy); } { @@ -2225,18 +2271,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } } + target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index]; if (cpi->common.frame_type == KEY_FRAME) - target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target); + target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); else - target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target); + target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); - rc->base_frame_target = target; + rc->base_frame_target = target_rate; #ifdef LONG_TERM_VBR_CORRECTION // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == RC_MODE_VBR) - vbr_rate_correction(&target, rc->vbr_bits_off_target); + vbr_rate_correction(&target_rate, rc->vbr_bits_off_target); #endif - vp9_rc_set_frame_target(cpi, target); + vp9_rc_set_frame_target(cpi, target_rate); // Update the total stats remaining structure. subtract_stats(&twopass->total_left_stats, &this_frame); @@ -2285,8 +2332,6 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { } else { #endif twopass->kf_group_bits -= bits_used; - twopass->gf_group_bits -= bits_used; - twopass->gf_group_bits = MAX(twopass->gf_group_bits, 0); } twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0); } diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 1a61132c6..309638c1e 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -11,6 +11,8 @@ #ifndef VP9_ENCODER_VP9_FIRSTPASS_H_ #define VP9_ENCODER_VP9_FIRSTPASS_H_ +#include "vp9/encoder/vp9_lookahead.h" + #ifdef __cplusplus extern "C" { #endif @@ -54,27 +56,21 @@ typedef struct { double modified_error_left; double kf_intra_err_min; double gf_intra_err_min; - int kf_bits; - // Remaining error from uncoded frames in a gf group. Two pass use only - int64_t gf_group_error_left; // Projected total bits available for a key frame group of frames int64_t kf_group_bits; // Error score of frames still to be coded in kf group int64_t kf_group_error_left; - - // Projected Bits available for a group of frames including 1 GF or ARF - int64_t gf_group_bits; - // Bits for the golden frame or ARF - 2 pass only - int gf_bits; - int sr_update_lag; int kf_zeromotion_pct; int gf_zeromotion_pct; int active_worst_quality; + + int gf_group_index; + int gf_group_bit_allocation[MAX_LAG_BUFFERS * 2]; } TWO_PASS; struct VP9_COMP; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 041e583fd..842bc5b9d 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -72,8 +72,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, x->mv_row_max = tmp_row_max; return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, - INT_MAX); + xd->plane[0].dst.buf, xd->plane[0].dst.stride); } static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, @@ -86,8 +85,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, - INT_MAX); + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the @@ -123,8 +121,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, - INT_MAX); + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; @@ -147,7 +144,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { xd->plane[0].dst.buf, xd->plane[0].dst.stride, 0, 0, 0); err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err); + xd->plane[0].dst.buf, xd->plane[0].dst.stride); // find best if (err < best_err) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 4f7d6f17c..9d2b2a497 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -524,9 +524,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv, - sad_per_bit); + get_buf_from_mv(in_what, ref_mv), in_what->stride); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -542,7 +540,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[t][i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -553,7 +551,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -585,7 +583,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[s][i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -596,7 +594,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -623,7 +621,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[s][next_chkpts_indices[i]].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -634,7 +632,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -661,7 +659,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + neighbors[i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -672,7 +670,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -894,8 +892,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, *best_mv = *ref_mv; *num00 = 11; best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); start_row = MAX(-range, x->mv_row_min - ref_mv->row); start_col = MAX(-range, x->mv_col_min - ref_mv->col); @@ -929,7 +926,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, for (i = 0; i < end_col - c; ++i) { const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -975,7 +972,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // Check the starting position best_sad = fn_ptr->sdf(what->buf, what->stride, - best_address, in_what->stride, 0x7fffffff) + + best_address, in_what->stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -986,8 +983,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_mv->col + ss[i].mv.col}; if (is_mv_in(x, &mv)) { int sad = fn_ptr->sdf(what->buf, what->stride, - best_address + ss[i].offset, in_what->stride, - best_sad); + best_address + ss[i].offset, in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -1012,7 +1008,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { int sad = fn_ptr->sdf(what->buf, what->stride, best_address + ss[best_site].offset, - in_what->stride, best_sad); + in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -1077,7 +1073,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, best_address = in_what; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1129,7 +1125,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[i].offset + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); @@ -1154,7 +1150,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[best_site].offset + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1253,7 +1249,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1261,7 +1257,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, for (c = col_min; c < col_max; ++c) { const MV mv = {r, c}; const int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + get_buf_from_mv(in_what, &mv), in_what->stride) + mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; @@ -1286,7 +1282,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1320,7 +1316,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, while (c < col_max) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - check_here, in_what->stride, best_sad); + check_here, in_what->stride); if (sad < best_sad) { const MV mv = {r, c}; sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); @@ -1351,7 +1347,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1409,7 +1405,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, while (c < col_max) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - check_here, in_what->stride, best_sad); + check_here, in_what->stride); if (sad < best_sad) { const MV mv = {r, c}; sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); @@ -1438,7 +1434,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride, 0x7fffffff) + + in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1450,7 +1446,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, ref_mv->col + neighbors[j].col}; if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -1483,7 +1479,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, - in_what->stride, 0x7fffffff) + + in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1524,7 +1520,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride, best_sad); + in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -1563,8 +1559,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - second_pred, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1577,8 +1572,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, - second_pred, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride, second_pred); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 437b68078..11633a73d 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -284,9 +284,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (1 << INTER_OFFSET(this_mode))) continue; - if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] * - rd_thresh_freq_fact[this_mode] >> 5) || - rd_threshes[mode_idx[this_mode]] == INT_MAX) + if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]], + rd_thresh_freq_fact[this_mode])) continue; if (this_mode == NEWMV) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3a2b1be5d..56fcf0fa6 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -9,7 +9,6 @@ */ #include <assert.h> -#include <limits.h> #include <math.h> #include <stdio.h> @@ -2130,8 +2129,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, // Find sad for current vector. this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_ptr, ref_y_stride, - 0x7fffffff); + ref_y_ptr, ref_y_stride); // Note if it is the best so far. if (this_sad < best_sad) { @@ -2209,8 +2207,6 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm, static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, - int_mv *ref_mv, - int_mv *second_ref_mv, int64_t comp_pred_diff[REFERENCE_MODES], const int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { @@ -2990,11 +2986,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ctx->mic = *xd->mi[0]; } -static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, - int thresh_fact) { - return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; -} - // Updating rd_thresh_freq_fact[] here means that the different // partition/block sizes are handled independently based on the best // choice for the current partition. It may well be better to keep a scaled @@ -3227,7 +3218,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Test best rd so far against threshold for trying this mode. if (rd_less_than_thresh(best_rd, rd_threshes[mode_index], - rd_thresh_freq_fact[mode_index])) + rd_thresh_freq_fact[mode_index])) continue; this_mode = vp9_mode_order[mode_index].mode; @@ -3620,9 +3611,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; @@ -4253,9 +4241,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_ref_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index b6b51e553..5ea09a8a7 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -11,6 +11,8 @@ #ifndef VP9_ENCODER_VP9_RDOPT_H_ #define VP9_ENCODER_VP9_RDOPT_H_ +#include <limits.h> + #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus @@ -87,6 +89,11 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi); +static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, + int thresh_fact) { + return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; +} + static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 892e90551..d06263676 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -35,14 +35,12 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride, #define sadMxN(m, n) \ unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int max_sad) { \ + const uint8_t *ref, int ref_stride) { \ return sad(src, src_stride, ref, ref_stride, m, n); \ } \ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred, \ - unsigned int max_sad) { \ + const uint8_t *second_pred) { \ uint8_t comp_pred[m * n]; \ vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ return sad(src, src_stride, comp_pred, m, m, n); \ @@ -54,8 +52,7 @@ void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ unsigned int *sads) { \ int i; \ for (i = 0; i < k; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride, \ - 0x7fffffff); \ + sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \ } #define sadMxNx4D(m, n) \ @@ -64,8 +61,7 @@ void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ unsigned int *sads) { \ int i; \ for (i = 0; i < 4; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride, \ - 0x7fffffff); \ + sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ } // 64x64 diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index dd28496be..48aa64c13 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -46,10 +46,10 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->key_frame_rate_correction_factor = 1.0; if (svc->number_temporal_layers > 1) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; lrc->last_q[0] = oxcf->best_allowed_q; lrc->last_q[1] = oxcf->best_allowed_q; lrc->last_q[2] = oxcf->best_allowed_q; @@ -82,9 +82,9 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, RATE_CONTROL *const lrc = &lc->rc; if (svc->number_temporal_layers > 1) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; } bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. @@ -132,8 +132,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { } else { const double prev_layer_framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; - const int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[layer - 1] * 1000; + const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1]; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 91d8ea4dc..02bed8988 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -156,6 +156,19 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } + +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + +void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); +} + unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index c47fe1335..4a194b72c 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -25,15 +25,13 @@ void variance(const uint8_t *a, int a_stride, typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, - int ref_stride, - unsigned int max_sad); + int ref_stride); typedef unsigned int(*vp9_sad_avg_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, - const uint8_t *second_pred, - unsigned int max_sad); + const uint8_t *second_pred); typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr, int source_stride, diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index e07cbf046..6b874f023 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -327,7 +327,8 @@ static vpx_codec_err_t set_encoder_config( else if (cfg->rc_end_usage == VPX_CBR) oxcf->rc_mode = RC_MODE_CBR; - oxcf->target_bandwidth = cfg->rc_target_bitrate; + // Convert target bandwidth from Kbit/s to Bit/s + oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; oxcf->best_allowed_q = vp9_quantizer_to_qindex(cfg->rc_min_quantizer); @@ -387,7 +388,9 @@ static vpx_codec_err_t set_encoder_config( oxcf->ss_number_layers = cfg->ss_number_layers; if (oxcf->ss_number_layers > 1) { - vp9_copy(oxcf->ss_target_bitrate, cfg->ss_target_bitrate); + int i; + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) + oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i]; } else if (oxcf->ss_number_layers == 1) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; } @@ -395,8 +398,11 @@ static vpx_codec_err_t set_encoder_config( oxcf->ts_number_layers = cfg->ts_number_layers; if (oxcf->ts_number_layers > 1) { - vp9_copy(oxcf->ts_target_bitrate, cfg->ts_target_bitrate); - vp9_copy(oxcf->ts_rate_decimator, cfg->ts_rate_decimator); + int i; + for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) { + oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i]; + oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i]; + } } else if (oxcf->ts_number_layers == 1) { oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth; oxcf->ts_rate_decimator[0] = 1; |