summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_idct32x32_add_neon.asm10
-rw-r--r--vp9/common/arm/neon/vp9_mb_lpf_neon.asm3
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.asm138
-rw-r--r--vp9/common/generic/vp9_systemdependent.c19
-rw-r--r--vp9/common/mips/dspr2/vp9_common_dspr2.h4
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans16_dspr2.c34
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c4
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans32_dspr2.c12
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans4_dspr2.c26
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans8_dspr2.c34
-rw-r--r--vp9/common/vp9_alloccommon.c14
-rw-r--r--vp9/common/vp9_blockd.c10
-rw-r--r--vp9/common/vp9_blockd.h10
-rw-r--r--vp9/common/vp9_convolve.c2
-rw-r--r--vp9/common/vp9_debugmodes.c2
-rw-r--r--vp9/common/vp9_entropy.c6
-rw-r--r--vp9/common/vp9_entropy.h16
-rw-r--r--vp9/common/vp9_entropymode.c8
-rw-r--r--vp9/common/vp9_entropymv.c4
-rw-r--r--vp9/common/vp9_filter.c14
-rw-r--r--vp9/common/vp9_filter.h14
-rw-r--r--vp9/common/vp9_frame_buffers.c80
-rw-r--r--vp9/common/vp9_frame_buffers.h53
-rw-r--r--vp9/common/vp9_loopfilter.c23
-rw-r--r--vp9/common/vp9_mv.h4
-rw-r--r--vp9/common/vp9_mvref_common.c30
-rw-r--r--vp9/common/vp9_mvref_common.h43
-rw-r--r--vp9/common/vp9_onyx.h11
-rw-r--r--vp9/common/vp9_onyxc_int.h32
-rw-r--r--vp9/common/vp9_postproc.c2
-rw-r--r--vp9/common/vp9_pred_common.c70
-rw-r--r--vp9/common/vp9_pred_common.h19
-rw-r--r--vp9/common/vp9_prob.c33
-rw-r--r--vp9/common/vp9_prob.h33
-rw-r--r--vp9/common/vp9_quant_common.c9
-rw-r--r--vp9/common/vp9_quant_common.h3
-rw-r--r--vp9/common/vp9_reconinter.c14
-rw-r--r--vp9/common/vp9_reconinter.h14
-rw-r--r--vp9/common/vp9_reconintra.c14
-rw-r--r--vp9/common/vp9_rtcd_defs.sh30
-rw-r--r--vp9/common/vp9_scale.h4
-rw-r--r--vp9/common/vp9_systemdependent.h24
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c168
-rw-r--r--vp9/common/x86/vp9_loopfilter_mmx.asm2
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c545
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c490
-rw-r--r--vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm422
-rw-r--r--vp9/decoder/vp9_decodeframe.c86
-rw-r--r--vp9/decoder/vp9_decodemv.c49
-rw-r--r--vp9/decoder/vp9_dthread.c29
-rw-r--r--vp9/decoder/vp9_onyxd_if.c11
-rw-r--r--vp9/encoder/vp9_bitstream.c91
-rw-r--r--vp9/encoder/vp9_block.h6
-rw-r--r--vp9/encoder/vp9_dct.c183
-rw-r--r--vp9/encoder/vp9_dct.h32
-rw-r--r--vp9/encoder/vp9_encodeframe.c877
-rw-r--r--vp9/encoder/vp9_encodemb.c288
-rw-r--r--vp9/encoder/vp9_encodemb.h29
-rw-r--r--vp9/encoder/vp9_encodemv.c17
-rw-r--r--vp9/encoder/vp9_encodemv.h8
-rw-r--r--vp9/encoder/vp9_firstpass.c814
-rw-r--r--vp9/encoder/vp9_firstpass.h89
-rw-r--r--vp9/encoder/vp9_lookahead.c3
-rw-r--r--vp9/encoder/vp9_mbgraph.c26
-rw-r--r--vp9/encoder/vp9_mbgraph.h18
-rw-r--r--vp9/encoder/vp9_mcomp.c439
-rw-r--r--vp9/encoder/vp9_mcomp.h26
-rw-r--r--vp9/encoder/vp9_onyx_if.c754
-rw-r--r--vp9/encoder/vp9_onyx_int.h215
-rw-r--r--vp9/encoder/vp9_pickmode.c130
-rw-r--r--vp9/encoder/vp9_psnr.c29
-rw-r--r--vp9/encoder/vp9_psnr.h25
-rw-r--r--vp9/encoder/vp9_quantize.c106
-rw-r--r--vp9/encoder/vp9_ratectrl.c891
-rw-r--r--vp9/encoder/vp9_ratectrl.h111
-rw-r--r--vp9/encoder/vp9_rdopt.c808
-rw-r--r--vp9/encoder/vp9_rdopt.h8
-rw-r--r--vp9/encoder/vp9_resize.c4
-rw-r--r--vp9/encoder/vp9_sad.c (renamed from vp9/encoder/vp9_sad_c.c)0
-rw-r--r--vp9/encoder/vp9_temporal_filter.c37
-rw-r--r--vp9/encoder/vp9_tokenize.c63
-rw-r--r--vp9/encoder/vp9_tokenize.h2
-rw-r--r--vp9/encoder/vp9_vaq.c65
-rw-r--r--vp9/encoder/vp9_variance.c (renamed from vp9/encoder/vp9_variance_c.c)0
-rw-r--r--vp9/encoder/vp9_write_bit_buffer.h2
-rw-r--r--vp9/encoder/vp9_writer.c5
-rw-r--r--vp9/encoder/vp9_writer.h11
-rw-r--r--vp9/encoder/x86/vp9_dct_avx2.c97
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c103
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3.asm5
-rw-r--r--vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c641
-rw-r--r--vp9/encoder/x86/vp9_variance_avx2.c52
-rw-r--r--vp9/vp9_common.mk8
-rw-r--r--vp9/vp9_cx_iface.c221
-rw-r--r--vp9/vp9_dx_iface.c69
-rw-r--r--vp9/vp9cx.mk10
96 files changed, 6473 insertions, 3676 deletions
diff --git a/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
index 388a7d719..72e933eee 100644
--- a/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
@@ -72,7 +72,7 @@ cospi_31_64 EQU 804
; reg1 = output[first_offset]
; reg2 = output[second_offset]
; for proper address calculation, the last offset used when manipulating
- ; output, wethere reading or storing) must be passed in. use 0 for first
+ ; output, whether reading or storing) must be passed in. use 0 for first
; use.
MACRO
LOAD_FROM_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2
@@ -88,7 +88,7 @@ cospi_31_64 EQU 804
; output[first_offset] = reg1
; output[second_offset] = reg2
; for proper address calculation, the last offset used when manipulating
- ; output, wethere reading or storing) must be passed in. use 0 for first
+ ; output, whether reading or storing) must be passed in. use 0 for first
; use.
MACRO
STORE_IN_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2
@@ -242,7 +242,7 @@ cospi_31_64 EQU 804
; TODO(cd): have special case to re-use constants when they are similar for
; consecutive butterflies
; TODO(cd): have special case when both constants are the same, do the
- ; additions/substractions before the multiplies.
+ ; additions/subtractions before the multiplies.
; generate the constants
; generate scalar constants
mov r8, #$first_constant & 0xFF00
@@ -260,7 +260,7 @@ cospi_31_64 EQU 804
vmull.s16 q11, $regB, d31
vmull.s16 q12, $regC, d31
; (used) five for intermediate (q8-q12), one for constants (q15)
- ; do some addition/substractions (to get back two register)
+ ; do some addition/subtractions (to get back two register)
vsub.s32 q8, q8, q10
vsub.s32 q9, q9, q11
; do more multiplications (ordered for maximum latency hiding)
@@ -268,7 +268,7 @@ cospi_31_64 EQU 804
vmull.s16 q11, $regA, d30
vmull.s16 q15, $regB, d30
; (used) six for intermediate (q8-q12, q15)
- ; do more addition/substractions
+ ; do more addition/subtractions
vadd.s32 q11, q12, q11
vadd.s32 q10, q10, q15
; (used) four for intermediate (q8-q11)
diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
index 8cb913cb8..5fe2bba46 100644
--- a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
+++ b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
@@ -439,6 +439,9 @@ v_end
tst r7, #1
bxne lr
+ orrs r5, r5, r6 ; Check for 0
+ orreq r7, r7, #2 ; Only do mbfilter branch
+
; mbfilter flat && mask branch
; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
; and using vibt on the q's?
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 279f678b1..dc9856fa8 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -315,8 +315,8 @@ loop_h
vdup.u16 q2, r2
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
+ vqmovun.s16 d0, q1
+ vqmovun.s16 d1, q2
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
@@ -327,8 +327,8 @@ loop_h
vdup.u16 q2, r2
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
+ vqmovun.s16 d0, q1
+ vqmovun.s16 d1, q2
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
bx lr
@@ -349,7 +349,7 @@ loop_h
vdup.u8 d0, r12
; preload 8 left
- vld1.8 d30, [r3]
+ vld1.8 {d30}, [r3]
; Load above 8 pixels
vld1.64 {d2}, [r2]
@@ -372,10 +372,10 @@ loop_h
vadd.s16 q8, q3, q8
vadd.s16 q9, q3, q9
- vqshrun.s16 d0, q0, #0
- vqshrun.s16 d1, q1, #0
- vqshrun.s16 d2, q8, #0
- vqshrun.s16 d3, q9, #0
+ vqmovun.s16 d0, q0
+ vqmovun.s16 d1, q1
+ vqmovun.s16 d2, q8
+ vqmovun.s16 d3, q9
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
@@ -394,10 +394,10 @@ loop_h
vadd.s16 q8, q3, q8
vadd.s16 q9, q3, q9
- vqshrun.s16 d0, q0, #0
- vqshrun.s16 d1, q1, #0
- vqshrun.s16 d2, q8, #0
- vqshrun.s16 d3, q9, #0
+ vqmovun.s16 d0, q0
+ vqmovun.s16 d1, q1
+ vqmovun.s16 d2, q8
+ vqmovun.s16 d3, q9
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
@@ -422,10 +422,10 @@ loop_h
vdup.u8 q0, r12
; Load above 8 pixels
- vld1.8 q1, [r2]
+ vld1.8 {q1}, [r2]
; preload 8 left into r12
- vld1.8 d18, [r3]!
+ vld1.8 {d18}, [r3]!
; Compute above - ytop_left
vsubl.u8 q2, d2, d0
@@ -445,10 +445,10 @@ loop_16x16_neon
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
- vqshrun.s16 d2, q1, #0
- vqshrun.s16 d3, q0, #0
- vqshrun.s16 d22, q11, #0
- vqshrun.s16 d23, q8, #0
+ vqmovun.s16 d2, q1
+ vqmovun.s16 d3, q0
+ vqmovun.s16 d22, q11
+ vqmovun.s16 d23, q8
vdup.16 q0, d20[2] ; proload next 2 rows data
vdup.16 q8, d20[3]
vst1.64 {d2,d3}, [r0], r1
@@ -459,10 +459,10 @@ loop_16x16_neon
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
- vqshrun.s16 d2, q1, #0
- vqshrun.s16 d3, q0, #0
- vqshrun.s16 d22, q11, #0
- vqshrun.s16 d23, q8, #0
+ vqmovun.s16 d2, q1
+ vqmovun.s16 d3, q0
+ vqmovun.s16 d22, q11
+ vqmovun.s16 d23, q8
vdup.16 q0, d21[0] ; proload next 2 rows data
vdup.16 q8, d21[1]
vst1.64 {d2,d3}, [r0], r1
@@ -472,10 +472,10 @@ loop_16x16_neon
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
- vqshrun.s16 d2, q1, #0
- vqshrun.s16 d3, q0, #0
- vqshrun.s16 d22, q11, #0
- vqshrun.s16 d23, q8, #0
+ vqmovun.s16 d2, q1
+ vqmovun.s16 d3, q0
+ vqmovun.s16 d22, q11
+ vqmovun.s16 d23, q8
vdup.16 q0, d21[2] ; proload next 2 rows data
vdup.16 q8, d21[3]
vst1.64 {d2,d3}, [r0], r1
@@ -486,13 +486,11 @@ loop_16x16_neon
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
- vqshrun.s16 d2, q1, #0
- vqshrun.s16 d3, q0, #0
- vqshrun.s16 d22, q11, #0
- vqshrun.s16 d23, q8, #0
- vdup.16 q0, d20[2]
- vdup.16 q8, d20[3]
- vld1.8 d18, [r3]! ; preload 8 left into r12
+ vqmovun.s16 d2, q1
+ vqmovun.s16 d3, q0
+ vqmovun.s16 d22, q11
+ vqmovun.s16 d23, q8
+ vld1.8 {d18}, [r3]! ; preload 8 left into r12
vmovl.u8 q10, d18
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
@@ -518,11 +516,11 @@ loop_16x16_neon
vdup.u8 q0, r12
; Load above 32 pixels
- vld1.8 q1, [r2]!
- vld1.8 q2, [r2]
+ vld1.8 {q1}, [r2]!
+ vld1.8 {q2}, [r2]
; preload 8 left pixels
- vld1.8 d26, [r3]!
+ vld1.8 {d26}, [r3]!
; Compute above - ytop_left
vsubl.u8 q8, d2, d0
@@ -544,19 +542,19 @@ loop_32x32_neon
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
- vqshrun.s16 d0, q12, #0
- vqshrun.s16 d1, q13, #0
+ vqmovun.s16 d0, q12
+ vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
- vqshrun.s16 d2, q14, #0
- vqshrun.s16 d3, q15, #0
+ vqmovun.s16 d2, q14
+ vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
- vqshrun.s16 d24, q12, #0
- vqshrun.s16 d25, q13, #0
- vqshrun.s16 d26, q14, #0
- vqshrun.s16 d27, q15, #0
+ vqmovun.s16 d24, q12
+ vqmovun.s16 d25, q13
+ vqmovun.s16 d26, q14
+ vqmovun.s16 d27, q15
vdup.16 q1, d6[2]
vdup.16 q2, d6[3]
vst1.64 {d24-d27}, [r0], r1
@@ -566,19 +564,19 @@ loop_32x32_neon
vadd.s16 q13, q1, q9
vadd.s16 q14, q1, q10
vadd.s16 q15, q1, q11
- vqshrun.s16 d0, q12, #0
- vqshrun.s16 d1, q13, #0
+ vqmovun.s16 d0, q12
+ vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
- vqshrun.s16 d2, q14, #0
- vqshrun.s16 d3, q15, #0
+ vqmovun.s16 d2, q14
+ vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
- vqshrun.s16 d24, q12, #0
- vqshrun.s16 d25, q13, #0
- vqshrun.s16 d26, q14, #0
- vqshrun.s16 d27, q15, #0
+ vqmovun.s16 d24, q12
+ vqmovun.s16 d25, q13
+ vqmovun.s16 d26, q14
+ vqmovun.s16 d27, q15
vdup.16 q0, d7[0]
vdup.16 q2, d7[1]
vst1.64 {d24-d27}, [r0], r1
@@ -588,19 +586,19 @@ loop_32x32_neon
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
- vqshrun.s16 d0, q12, #0
- vqshrun.s16 d1, q13, #0
+ vqmovun.s16 d0, q12
+ vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
- vqshrun.s16 d2, q14, #0
- vqshrun.s16 d3, q15, #0
+ vqmovun.s16 d2, q14
+ vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
- vqshrun.s16 d24, q12, #0
- vqshrun.s16 d25, q13, #0
- vqshrun.s16 d26, q14, #0
- vqshrun.s16 d27, q15, #0
+ vqmovun.s16 d24, q12
+ vqmovun.s16 d25, q13
+ vqmovun.s16 d26, q14
+ vqmovun.s16 d27, q15
vdup.16 q0, d7[2]
vdup.16 q2, d7[3]
vst1.64 {d24-d27}, [r0], r1
@@ -610,20 +608,20 @@ loop_32x32_neon
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
- vqshrun.s16 d0, q12, #0
- vqshrun.s16 d1, q13, #0
+ vqmovun.s16 d0, q12
+ vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
- vqshrun.s16 d2, q14, #0
- vqshrun.s16 d3, q15, #0
+ vqmovun.s16 d2, q14
+ vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
- vqshrun.s16 d24, q12, #0
- vqshrun.s16 d25, q13, #0
- vld1.8 d0, [r3]! ; preload 8 left pixels
- vqshrun.s16 d26, q14, #0
- vqshrun.s16 d27, q15, #0
+ vqmovun.s16 d24, q12
+ vqmovun.s16 d25, q13
+ vld1.8 {d0}, [r3]! ; preload 8 left pixels
+ vqmovun.s16 d26, q14
+ vqmovun.s16 d27, q15
vmovl.u8 q3, d0
vst1.64 {d24-d27}, [r0], r1
diff --git a/vp9/common/generic/vp9_systemdependent.c b/vp9/common/generic/vp9_systemdependent.c
deleted file mode 100644
index 536febb65..000000000
--- a/vp9/common/generic/vp9_systemdependent.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_onyxc_int.h"
-
-void vp9_machine_specific_config(VP9_COMMON *cm) {
- (void)cm;
- vp9_rtcd();
-}
diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h
index 991d3c2b3..6ebea9f2f 100644
--- a/vp9/common/mips/dspr2/vp9_common_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h
@@ -85,8 +85,8 @@ static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {
);
}
-void vp9_idct32_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride);
+void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride);
void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 1b2f5506a..19c582fd1 100644
--- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct16_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct16_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int i;
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int step1_10, step1_11, step1_12, step1_13;
@@ -404,8 +404,8 @@ static void idct16_1d_rows_dspr2(const int16_t *input, int16_t *output,
}
}
-static void idct16_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int i;
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int step1_8, step1_9, step1_10, step1_11;
@@ -905,13 +905,13 @@ void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
);
// First transform rows
- idct16_1d_rows_dspr2(input, out, 16);
+ idct16_rows_dspr2(input, out, 16);
// Then transform columns and add to dest
- idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
-static void iadst16_1d(const int16_t *input, int16_t *output) {
+static void iadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -1099,16 +1099,16 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- idct16_1d_rows_dspr2(input, outptr, 16);
- idct16_1d_cols_add_blk_dspr2(out, dest, pitch);
+ idct16_rows_dspr2(input, outptr, 16);
+ idct16_cols_add_blk_dspr2(out, dest, pitch);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- idct16_1d_rows_dspr2(input, outptr, 16);
+ idct16_rows_dspr2(input, outptr, 16);
outptr = out;
for (i = 0; i < 16; ++i) {
- iadst16_1d(outptr, temp_out);
+ iadst16(outptr, temp_out);
for (j = 0; j < 16; ++j)
dest[j * pitch + i] =
@@ -1125,7 +1125,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
- iadst16_1d(input, outptr);
+ iadst16(input, outptr);
input += 16;
outptr += 16;
}
@@ -1134,7 +1134,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
for (j = 0; j < 16; ++j)
temp_in[j * 16 + i] = out[i * 16 + j];
- idct16_1d_cols_add_blk_dspr2(temp_in, dest, pitch);
+ idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
}
break;
case ADST_ADST: // ADST in both directions
@@ -1145,7 +1145,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
- iadst16_1d(input, outptr);
+ iadst16(input, outptr);
input += 16;
outptr += 16;
}
@@ -1153,7 +1153,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- iadst16_1d(temp_in, temp_out);
+ iadst16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * pitch + i] =
clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
@@ -1183,7 +1183,7 @@ void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
- idct16_1d_rows_dspr2(input, outptr, 4);
+ idct16_rows_dspr2(input, outptr, 4);
outptr += 4;
for (i = 0; i < 6; ++i) {
@@ -1213,7 +1213,7 @@ void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
}
// Then transform columns
- idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 5e92db3d2..132d88ce5 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -18,8 +18,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-void vp9_idct32_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
index bc6759400..74a90b02c 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct32_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
@@ -882,10 +882,10 @@ void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- idct32_1d_rows_dspr2(input, outptr, 32);
+ idct32_rows_dspr2(input, outptr, 32);
// Columns
- vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ vp9_idct32_cols_add_blk_dspr2(out, dest, dest_stride);
}
void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
@@ -903,7 +903,7 @@ void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- idct32_1d_rows_dspr2(input, outptr, 8);
+ idct32_rows_dspr2(input, outptr, 8);
outptr += 8;
__asm__ __volatile__ (
@@ -947,7 +947,7 @@ void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
}
// Columns
- vp9_idct32_1d_cols_add_blk_dspr2(out, dest, stride);
+ vp9_idct32_cols_add_blk_dspr2(out, dest, stride);
}
void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 5b7aa5e71..1990348b8 100644
--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -19,7 +19,7 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) {
+static void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
const int const_2_power_13 = 8192;
@@ -104,7 +104,7 @@ static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) {
}
}
-static void vp9_idct4_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
@@ -240,10 +240,10 @@ void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- vp9_idct4_1d_rows_dspr2(input, outptr);
+ vp9_idct4_rows_dspr2(input, outptr);
// Columns
- vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
@@ -319,7 +319,7 @@ void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
}
}
-static void iadst4_1d_dspr2(const int16_t *input, int16_t *output) {
+static void iadst4_dspr2(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0, x1, x2, x3;
@@ -379,16 +379,16 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- vp9_idct4_1d_rows_dspr2(input, outptr);
- vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vp9_idct4_rows_dspr2(input, outptr);
+ vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- vp9_idct4_1d_rows_dspr2(input, outptr);
+ vp9_idct4_rows_dspr2(input, outptr);
outptr = out;
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(outptr, temp_out);
+ iadst4_dspr2(outptr, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] =
@@ -400,7 +400,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
break;
case DCT_ADST: // DCT in vertical, ADST in horizontal
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(input, outptr);
+ iadst4_dspr2(input, outptr);
input += 4;
outptr += 4;
}
@@ -410,11 +410,11 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
temp_in[i * 4 + j] = out[j * 4 + i];
}
}
- vp9_idct4_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(input, outptr);
+ iadst4_dspr2(input, outptr);
input += 4;
outptr += 4;
}
@@ -422,7 +422,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- iadst4_1d_dspr2(temp_in, temp_out);
+ iadst4_dspr2(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] =
diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index 93a08401d..acccaea6d 100644
--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct8_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct8_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
const int const_2_power_13 = 8192;
int Temp0, Temp1, Temp2, Temp3, Temp4;
@@ -200,8 +200,8 @@ static void idct8_1d_rows_dspr2(const int16_t *input, int16_t *output,
}
}
-static void idct8_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int Temp0, Temp1, Temp2, Temp3;
int i;
@@ -462,13 +462,13 @@ void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
);
// First transform rows
- idct8_1d_rows_dspr2(input, outptr, 8);
+ idct8_rows_dspr2(input, outptr, 8);
// Then transform columns and add to dest
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
-static void iadst8_1d_dspr2(const int16_t *input, int16_t *output) {
+static void iadst8_dspr2(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0, x1, x2, x3, x4, x5, x6, x7;
@@ -563,14 +563,14 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- idct8_1d_rows_dspr2(input, outptr, 8);
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_rows_dspr2(input, outptr, 8);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- idct8_1d_rows_dspr2(input, outptr, 8);
+ idct8_rows_dspr2(input, outptr, 8);
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(&out[i * 8], temp_out);
+ iadst8_dspr2(&out[i * 8], temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] =
@@ -580,7 +580,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
break;
case DCT_ADST: // DCT in vertical, ADST in horizontal
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(input, outptr);
+ iadst8_dspr2(input, outptr);
input += 8;
outptr += 8;
}
@@ -590,11 +590,11 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
temp_in[i * 8 + j] = out[j * 8 + i];
}
}
- idct8_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(input, outptr);
+ iadst8_dspr2(input, outptr);
input += 8;
outptr += 8;
}
@@ -603,7 +603,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- iadst8_1d_dspr2(temp_in, temp_out);
+ iadst8_dspr2(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] =
@@ -631,7 +631,7 @@ void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest,
);
// First transform rows
- idct8_1d_rows_dspr2(input, outptr, 4);
+ idct8_rows_dspr2(input, outptr, 4);
outptr += 4;
@@ -659,7 +659,7 @@ void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest,
// Then transform columns and add to dest
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index e033fbb99..ff4b7c1f9 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -33,9 +33,16 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
void vp9_free_frame_buffers(VP9_COMMON *cm) {
int i;
- for (i = 0; i < FRAME_BUFFERS; i++)
+ for (i = 0; i < FRAME_BUFFERS; i++) {
vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
+ if (cm->frame_bufs[i].ref_count > 0 &&
+ cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
+ cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
+ cm->frame_bufs[i].ref_count = 0;
+ }
+ }
+
vp9_free_frame_buffer(&cm->post_proc_buffer);
vpx_free(cm->mip);
@@ -85,7 +92,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
int mi_size;
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9_DEC_BORDER_IN_PIXELS) < 0)
+ VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
@@ -194,11 +201,12 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
}
void vp9_create_common(VP9_COMMON *cm) {
- vp9_machine_specific_config(cm);
+ vp9_rtcd();
}
void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_frame_buffers(cm);
+ vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
}
void vp9_initialize_common() {
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index 8cc657114..d918bedc6 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -98,16 +98,6 @@ void vp9_foreach_transformed_block(const MACROBLOCKD* const xd,
vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
}
-void vp9_foreach_transformed_block_uv(const MACROBLOCKD* const xd,
- BLOCK_SIZE bsize,
- foreach_transformed_block_visitor visit,
- void *arg) {
- int plane;
-
- for (plane = 1; plane < MAX_MB_PLANE; plane++)
- vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
-}
-
void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
int aoff, int loff) {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 70b8ffa4e..6086323f6 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -89,7 +89,6 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
-
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
@@ -129,7 +128,7 @@ typedef struct {
uint8_t mode_context[MAX_REF_FRAMES];
- unsigned char skip_coeff; // 0=need to decode coeffs, 1=no coefficients
+ unsigned char skip; // 0=need to decode coeffs, 1=no coefficients
unsigned char segment_id; // Segment id for this block.
// Flags used for prediction status of various bit-stream signals
@@ -182,7 +181,7 @@ struct macroblockd_plane {
int subsampling_y;
struct buf_2d dst;
struct buf_2d pre[2];
- int16_t *dequant;
+ const int16_t *dequant;
ENTROPY_CONTEXT *above_context;
ENTROPY_CONTEXT *left_context;
};
@@ -314,11 +313,6 @@ void vp9_foreach_transformed_block(
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg);
-
-void vp9_foreach_transformed_block_uv(
- const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
- foreach_transformed_block_visitor visit, void *arg);
-
static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int block,
int *x, int *y) {
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 3807ccc87..d30e0b488 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -145,7 +145,7 @@ static const InterpKernel *get_filter_base(const int16_t *filter) {
}
static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
- return (const InterpKernel *)(intptr_t)f - base;
+ return (int)((const InterpKernel *)(intptr_t)f - base);
}
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 355ac1a49..24c785f2a 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -58,7 +58,7 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, char *file) {
print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
- print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip_coeff));
+ print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip));
print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 13e954efe..bc12f9aa2 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -16,7 +16,7 @@
#include "vpx/vpx_integer.h"
-DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]) = {
+const uint8_t vp9_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
@@ -85,11 +85,11 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]) = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
-DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]) = {
+const uint8_t vp9_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
};
-DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]) = {
+const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = {
0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
};
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index e030d92ec..aab8b5388 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -42,7 +42,7 @@ extern "C" {
#define ENTROPY_NODES 11
-extern DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]);
#define EOB_MODEL_TOKEN 3
extern const vp9_tree_index vp9_coefmodel_tree[];
@@ -116,10 +116,10 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
// This macro is currently unused but may be used by certain implementations
#define MAXBAND_INDEX 21
-extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]);
-extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]);
-static const uint8_t *get_band_translate(TX_SIZE tx_size) {
+static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
: vp9_coefband_trans_8x8plus;
}
@@ -146,8 +146,8 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
-static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
- const ENTROPY_CONTEXT *l) {
+static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
switch (tx_size) {
@@ -174,8 +174,8 @@ static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
return combine_entropy_contexts(above_ec, left_ec);
}
-static const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
- PLANE_TYPE type, int block_idx) {
+static const INLINE scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
+ PLANE_TYPE type, int block_idx) {
const MODE_INFO *const mi = xd->mi_8x8[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 6def3c869..892153936 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -345,7 +345,7 @@ static int adapt_prob(vp9_prob pre_prob, const unsigned int ct[2]) {
static void adapt_probs(const vp9_tree_index *tree,
const vp9_prob *pre_probs, const unsigned int *counts,
vp9_prob *probs) {
- tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
+ vp9_tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
probs);
}
@@ -465,8 +465,10 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
}
- vpx_memset(cm->prev_mip, 0,
- cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
+ if (frame_is_intra_only(cm))
+ vpx_memset(cm->prev_mip, 0,
+ cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
+
vpx_memset(cm->mip, 0,
cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 60ae79fdc..e1f5ef7b4 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -192,8 +192,8 @@ static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
const unsigned int *counts, vp9_prob *probs) {
- tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR,
- probs);
+ vp9_tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT,
+ MV_MAX_UPDATE_FACTOR, probs);
}
void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 546f603b6..7474a88bc 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -10,12 +10,9 @@
#include <assert.h>
-#include "vpx_ports/mem.h"
-
#include "vp9/common/vp9_filter.h"
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
+const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 },
@@ -35,8 +32,7 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+const InterpKernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS] = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 1, -5, 126, 8, -3, 1, 0},
{ -1, 3, -10, 122, 18, -6, 2, 0},
@@ -56,8 +52,7 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// DCT based filter
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+const InterpKernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS] = {
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
{-2, 5, -13, 125, 17, -6, 3, -1},
@@ -77,8 +72,7 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+const InterpKernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS] = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 15610d781..29d3867c9 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -13,6 +13,8 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
#ifdef __cplusplus
extern "C" {
@@ -37,10 +39,14 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
-extern const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS];
-extern const InterpKernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
-extern const InterpKernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
-extern const InterpKernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
+DECLARE_ALIGNED(256, extern const InterpKernel,
+ vp9_bilinear_filters[SUBPEL_SHIFTS]);
+DECLARE_ALIGNED(256, extern const InterpKernel,
+ vp9_sub_pel_filters_8[SUBPEL_SHIFTS]);
+DECLARE_ALIGNED(256, extern const InterpKernel,
+ vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]);
+DECLARE_ALIGNED(256, extern const InterpKernel,
+ vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]);
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
// filter kernel as a 2 tap filter.
diff --git a/vp9/common/vp9_frame_buffers.c b/vp9/common/vp9_frame_buffers.c
new file mode 100644
index 000000000..dffeb8a22
--- /dev/null
+++ b/vp9/common/vp9_frame_buffers.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_frame_buffers.h"
+#include "vpx_mem/vpx_mem.h"
+
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
+ assert(list != NULL);
+ vp9_free_internal_frame_buffers(list);
+
+ list->num_internal_frame_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+ list->int_fb = vpx_calloc(list->num_internal_frame_buffers,
+ sizeof(*list->int_fb));
+ return (list->int_fb == NULL);
+}
+
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) {
+ int i;
+
+ assert(list != NULL);
+
+ for (i = 0; i < list->num_internal_frame_buffers; ++i) {
+ vpx_free(list->int_fb[i].data);
+ list->int_fb[i].data = NULL;
+ }
+ vpx_free(list->int_fb);
+ list->int_fb = NULL;
+}
+
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb) {
+ int i;
+ InternalFrameBufferList *const int_fb_list =
+ (InternalFrameBufferList *)cb_priv;
+ if (int_fb_list == NULL)
+ return -1;
+
+ // Find a free frame buffer.
+ for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
+ if (!int_fb_list->int_fb[i].in_use)
+ break;
+ }
+
+ if (i == int_fb_list->num_internal_frame_buffers)
+ return -1;
+
+ if (int_fb_list->int_fb[i].size < min_size) {
+ int_fb_list->int_fb[i].data =
+ (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size);
+ if (!int_fb_list->int_fb[i].data)
+ return -1;
+
+ int_fb_list->int_fb[i].size = min_size;
+ }
+
+ fb->data = int_fb_list->int_fb[i].data;
+ fb->size = int_fb_list->int_fb[i].size;
+ int_fb_list->int_fb[i].in_use = 1;
+
+ // Set the frame buffer's private data to point at the internal frame buffer.
+ fb->priv = &int_fb_list->int_fb[i];
+ return 0;
+}
+
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
+ InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
+ (void)cb_priv;
+ int_fb->in_use = 0;
+ return 0;
+}
diff --git a/vp9/common/vp9_frame_buffers.h b/vp9/common/vp9_frame_buffers.h
new file mode 100644
index 000000000..e2cfe61b6
--- /dev/null
+++ b/vp9/common/vp9_frame_buffers.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#define VP9_COMMON_VP9_FRAME_BUFFERS_H_
+
+#include "vpx/vpx_frame_buffer.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct InternalFrameBuffer {
+ uint8_t *data;
+ size_t size;
+ int in_use;
+} InternalFrameBuffer;
+
+typedef struct InternalFrameBufferList {
+ int num_internal_frame_buffers;
+ InternalFrameBuffer *int_fb;
+} InternalFrameBufferList;
+
+// Initializes |list|. Returns 0 on success.
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Free any data allocated to the frame buffers.
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Callback used by libvpx to request an external frame buffer. |cb_priv|
+// Callback private data, which points to an InternalFrameBufferList.
+// |min_size| is the minimum size in bytes needed to decode the next frame.
+// |fb| pointer to the frame buffer.
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb);
+
+// Callback used by libvpx when there are no references to the frame buffer.
+// |cb_priv| is not used. |fb| pointer to the frame buffer.
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 07d7a92f6..868a66ae4 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -262,9 +262,9 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
int lvl_seg = default_filt_lvl;
if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
- lvl_seg = seg->abs_delta == SEGMENT_ABSDATA
- ? data
- : clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER);
+ lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
+ data : default_filt_lvl + data,
+ 0, MAX_LOOP_FILTER);
}
if (!lf->mode_ref_delta_enabled) {
@@ -496,7 +496,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
const BLOCK_SIZE block_size = mi->mbmi.sb_type;
const TX_SIZE tx_size_y = mi->mbmi.tx_size;
const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
- const int skip = mi->mbmi.skip_coeff;
+ const int skip = mi->mbmi.skip;
const int seg = mi->mbmi.segment_id;
const int ref = mi->mbmi.ref_frame[0];
const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
@@ -577,7 +577,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
LOOP_FILTER_MASK *lfm) {
const BLOCK_SIZE block_size = mi->mbmi.sb_type;
const TX_SIZE tx_size_y = mi->mbmi.tx_size;
- const int skip = mi->mbmi.skip_coeff;
+ const int skip = mi->mbmi.skip;
const int seg = mi->mbmi.segment_id;
const int ref = mi->mbmi.ref_frame[0];
const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
@@ -868,7 +868,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
}
-#if CONFIG_NON420
static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
const MB_MODE_INFO *mbmi) {
const int seg = mbmi->segment_id;
@@ -937,8 +936,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
- const int skip_this = mi[0].mbmi.skip_coeff
- && is_inter_block(&mi[0].mbmi);
+ const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
!(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
@@ -1047,7 +1045,6 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
dst->buf += 8 * dst->stride;
}
}
-#endif
void vp9_filter_block_plane(VP9_COMMON *const cm,
struct macroblockd_plane *const plane,
@@ -1207,10 +1204,8 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
LOOP_FILTER_MASK lfm;
-#if CONFIG_NON420
int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
xd->plane[1].subsampling_x == 1);
-#endif
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
@@ -1221,22 +1216,16 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
-#if CONFIG_NON420
if (use_420)
-#endif
vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
cm->mode_info_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
-#if CONFIG_NON420
if (use_420)
-#endif
vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
-#if CONFIG_NON420
else
filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
mi_row, mi_col);
-#endif
}
}
}
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 98fd1d82f..3eb7f9d61 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -34,8 +34,8 @@ typedef struct mv32 {
int32_t col;
} MV32;
-static void clamp_mv(MV *mv, int min_col, int max_col,
- int min_row, int max_row) {
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
+ int min_row, int max_row) {
mv->col = clamp(mv->col, min_col, max_col);
mv->row = clamp(mv->row, min_row, max_row);
}
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index ff0262210..e5f3fed45 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -186,17 +186,17 @@ static INLINE int is_inside(const TileInfo *const tile,
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
-void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- const TileInfo *const tile,
- MODE_INFO *mi, const MODE_INFO *prev_mi,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list,
- int block_idx,
- int mi_row, int mi_col) {
+static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const TileInfo *const tile,
+ MODE_INFO *mi, const MODE_INFO *prev_mi,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int block_idx, int mi_row, int mi_col) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
- const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
+ const MB_MODE_INFO *const prev_mbmi = cm->coding_use_prev_mi && prev_mi ?
+ &prev_mi->mbmi : NULL;
int different_ref_found = 0;
int context_counter = 0;
@@ -290,6 +290,16 @@ void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
}
+void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const TileInfo *const tile,
+ MODE_INFO *mi, const MODE_INFO *prev_mi,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int mi_row, int mi_col) {
+ find_mv_refs_idx(cm, xd, tile, mi, prev_mi, ref_frame, mv_ref_list, -1,
+ mi_row, mi_col);
+}
+
static void lower_mv_precision(MV *mv, int allow_hp) {
const int use_hp = allow_hp && vp9_use_mv_hp(mv);
if (!use_hp) {
@@ -324,8 +334,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
assert(MAX_MV_REF_CANDIDATES == 2);
- vp9_find_mv_refs_idx(cm, xd, tile, mi, xd->last_mi, mi->mbmi.ref_frame[ref],
- mv_list, block, mi_row, mi_col);
+ find_mv_refs_idx(cm, xd, tile, mi, xd->last_mi, mi->mbmi.ref_frame[ref],
+ mv_list, block, mi_row, mi_col);
near->as_int = 0;
switch (block) {
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index 0936abfcd..04cb000ef 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -17,29 +17,24 @@
extern "C" {
#endif
+#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
+#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
+ VP9_INTERP_EXTEND) << 3)
-void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- const TileInfo *const tile,
- MODE_INFO *mi, const MODE_INFO *prev_mi,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list,
- int block_idx,
- int mi_row, int mi_col);
-
-static INLINE void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- const TileInfo *const tile,
- MODE_INFO *mi, const MODE_INFO *prev_mi,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list,
- int mi_row, int mi_col) {
- vp9_find_mv_refs_idx(cm, xd, tile, mi, prev_mi, ref_frame,
- mv_ref_list, -1, mi_row, mi_col);
+// TODO(jingning): this mv clamping function should be block size dependent.
+static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
+ clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
-#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS \
- - VP9_INTERP_EXTEND) << 3)
-#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS \
- - VP9_INTERP_EXTEND) << 3)
+void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const TileInfo *const tile,
+ MODE_INFO *mi, const MODE_INFO *prev_mi,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int mi_row, int mi_col);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
@@ -47,14 +42,6 @@ static INLINE void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
int_mv *mvlist, int_mv *nearest, int_mv *near);
-// TODO(jingning): this mv clamping function should be block size dependent.
-static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
- clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
-}
-
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 564e4195f..222086886 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -112,7 +112,6 @@ extern "C" {
int auto_key; // autodetect cut scenes and set the keyframes
int key_freq; // maximum distance to key frame.
- int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
int lag_in_frames; // how many frames lag before we start encoding
// ----------------------------------------------------------------
@@ -147,8 +146,14 @@ extern "C" {
// END DATARATE CONTROL OPTIONS
// ----------------------------------------------------------------
- // Spatial scalability
- int ss_number_layers;
+ // Spatial and temporal scalability.
+ int ss_number_layers; // Number of spatial layers.
+ int ts_number_layers; // Number of temporal layers.
+ // Bitrate allocation for spatial layers.
+ int ss_target_bitrate[VPX_SS_MAX_LAYERS];
+ // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
+ int ts_target_bitrate[VPX_TS_MAX_LAYERS];
+ int ts_rate_decimator[VPX_TS_MAX_LAYERS];
// these parameters aren't to be used in final build don't use!!!
int play_alternate;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d92a25b12..e6d6ea7f0 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -18,6 +18,7 @@
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_frame_buffers.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_tile_common.h"
@@ -94,6 +95,7 @@ typedef enum {
typedef struct {
int ref_count;
+ vpx_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf;
} RefCntBuffer;
@@ -222,14 +224,27 @@ typedef struct VP9Common {
int error_resilient_mode;
int frame_parallel_decoding_mode;
+ // Flag indicates if prev_mi can be used in coding:
+ // 0: encoder assumes decoder does not have prev_mi
+ // 1: encoder assumes decoder has and uses prev_mi
+ unsigned int coding_use_prev_mi;
+
int log2_tile_cols, log2_tile_rows;
+
+ // Private data associated with the frame buffer callbacks.
+ void *cb_priv;
+ vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+ // Handles memory for the codec.
+ InternalFrameBufferList int_frame_buffers;
} VP9_COMMON;
-static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
+static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
return &cm->frame_bufs[cm->new_fb_idx].buf;
}
-static int get_free_fb(VP9_COMMON *cm) {
+static INLINE int get_free_fb(VP9_COMMON *cm) {
int i;
for (i = 0; i < FRAME_BUFFERS; i++)
if (cm->frame_bufs[i].ref_count == 0)
@@ -240,7 +255,7 @@ static int get_free_fb(VP9_COMMON *cm) {
return i;
}
-static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
+static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
const int ref_index = *idx;
if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
@@ -251,7 +266,7 @@ static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
bufs[new_idx].ref_count++;
}
-static int mi_cols_aligned_to_sb(int n_mis) {
+static INLINE int mi_cols_aligned_to_sb(int n_mis) {
return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
}
@@ -275,10 +290,10 @@ static INLINE void set_skip_context(
}
}
-static void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
- int mi_row, int bh,
- int mi_col, int bw,
- int mi_rows, int mi_cols) {
+static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
+ int mi_row, int bh,
+ int mi_col, int bw,
+ int mi_rows, int mi_cols) {
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
@@ -292,7 +307,6 @@ static void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
static void set_prev_mi(VP9_COMMON *cm) {
const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
cm->height == cm->last_height &&
- !cm->error_resilient_mode &&
!cm->intra_only &&
cm->last_show_frame;
// Special case: set prev_mi to NULL when the previous mode info
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index a172ba6a2..7baa9ee33 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -700,7 +700,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
char zz[4];
int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED &&
mi[mb_index].mbmi.mode != SPLITMV &&
- mi[mb_index].mbmi.skip_coeff);
+ mi[mb_index].mbmi.skip);
if (cm->frame_type == KEY_FRAME)
snprintf(zz, sizeof(zz) - 1, "a");
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 11b6d93c1..197bcb643 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -218,27 +218,25 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
} else { // inter/inter
const int above_has_second = has_second_ref(above_mbmi);
const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
if (above_has_second && left_has_second) {
- pred_context = 1 + (above_mbmi->ref_frame[0] == LAST_FRAME ||
- above_mbmi->ref_frame[1] == LAST_FRAME ||
- left_mbmi->ref_frame[0] == LAST_FRAME ||
- left_mbmi->ref_frame[1] == LAST_FRAME);
+ pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+ left0 == LAST_FRAME || left1 == LAST_FRAME);
} else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf1 = above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf2 = above_has_second ?
- above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
if (rfs == LAST_FRAME)
pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
else
pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
} else {
- pred_context = 2 * (above_mbmi->ref_frame[0] == LAST_FRAME) +
- 2 * (left_mbmi->ref_frame[0] == LAST_FRAME);
+ pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
}
}
} else if (has_above || has_left) { // one edge available
@@ -291,23 +289,23 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
} else { // inter/inter
const int above_has_second = has_second_ref(above_mbmi);
const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
if (above_has_second && left_has_second) {
- if (above_mbmi->ref_frame[0] == left_mbmi->ref_frame[0] &&
- above_mbmi->ref_frame[1] == left_mbmi->ref_frame[1])
- pred_context = 3 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- above_mbmi->ref_frame[1] == GOLDEN_FRAME ||
- left_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- left_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == GOLDEN_FRAME ||
+ above1 == GOLDEN_FRAME ||
+ left0 == GOLDEN_FRAME ||
+ left1 == GOLDEN_FRAME);
else
pred_context = 2;
} else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf1 = above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf2 = above_has_second ?
- above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
if (rfs == GOLDEN_FRAME)
pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
@@ -316,17 +314,15 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
else
pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
} else {
- if (above_mbmi->ref_frame[0] == LAST_FRAME &&
- left_mbmi->ref_frame[0] == LAST_FRAME) {
+ if (above0 == LAST_FRAME && left0 == LAST_FRAME) {
pred_context = 3;
- } else if (above_mbmi->ref_frame[0] == LAST_FRAME ||
- left_mbmi->ref_frame[0] == LAST_FRAME) {
- const MB_MODE_INFO *edge_mbmi =
- above_mbmi->ref_frame[0] == LAST_FRAME ? left_mbmi : above_mbmi;
- pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) {
+ const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0
+ : above0;
+ pred_context = 4 * (edge0 == GOLDEN_FRAME);
} else {
- pred_context = 2 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME) +
- 2 * (left_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ pred_context = 2 * (above0 == GOLDEN_FRAME) +
+ 2 * (left0 == GOLDEN_FRAME);
}
}
}
@@ -357,10 +353,10 @@ int vp9_get_tx_size_context(const MACROBLOCKD *xd) {
const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
const int has_above = above_mbmi != NULL;
const int has_left = left_mbmi != NULL;
- int above_ctx = (has_above && !above_mbmi->skip_coeff) ? above_mbmi->tx_size
- : max_tx_size;
- int left_ctx = (has_left && !left_mbmi->skip_coeff) ? left_mbmi->tx_size
- : max_tx_size;
+ int above_ctx = (has_above && !above_mbmi->skip) ? above_mbmi->tx_size
+ : max_tx_size;
+ int left_ctx = (has_left && !left_mbmi->skip) ? left_mbmi->tx_size
+ : max_tx_size;
if (!has_left)
left_ctx = above_ctx;
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 0acee32f8..6c7a0d383 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -39,7 +39,7 @@ static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
return above_sip + left_sip;
}
-static INLINE vp9_prob vp9_get_pred_prob_seg_id(struct segmentation *seg,
+static INLINE vp9_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg,
const MACROBLOCKD *xd) {
return seg->pred_probs[vp9_get_pred_context_seg_id(xd)];
}
@@ -47,8 +47,8 @@ static INLINE vp9_prob vp9_get_pred_prob_seg_id(struct segmentation *seg,
static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
const MODE_INFO *const above_mi = get_above_mi(xd);
const MODE_INFO *const left_mi = get_left_mi(xd);
- const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip_coeff : 0;
- const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip_coeff : 0;
+ const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
+ const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
return above_skip + left_skip;
}
@@ -98,8 +98,8 @@ static INLINE vp9_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
int vp9_get_tx_size_context(const MACROBLOCKD *xd);
-static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
- const struct tx_probs *tx_probs) {
+static INLINE const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
+ const struct tx_probs *tx_probs) {
switch (max_tx_size) {
case TX_8X8:
return tx_probs->p8x8[ctx];
@@ -113,13 +113,14 @@ static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
}
}
-static const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size, const MACROBLOCKD *xd,
- const struct tx_probs *tx_probs) {
+static INLINE const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size,
+ const MACROBLOCKD *xd,
+ const struct tx_probs *tx_probs) {
return get_tx_probs(max_tx_size, vp9_get_tx_size_context(xd), tx_probs);
}
-static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
- struct tx_counts *tx_counts) {
+static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
+ struct tx_counts *tx_counts) {
switch (max_tx_size) {
case TX_8X8:
return tx_counts->p8x8[ctx];
diff --git a/vp9/common/vp9_prob.c b/vp9/common/vp9_prob.c
index 884884e0b..a1befc63e 100644
--- a/vp9/common/vp9_prob.c
+++ b/vp9/common/vp9_prob.c
@@ -10,7 +10,7 @@
#include "vp9/common/vp9_prob.h"
-DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = {
+const uint8_t vp9_norm[256] = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -28,3 +28,34 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
+
+
+static unsigned int tree_merge_probs_impl(unsigned int i,
+ const vp9_tree_index *tree,
+ const vp9_prob *pre_probs,
+ const unsigned int *counts,
+ unsigned int count_sat,
+ unsigned int max_update,
+ vp9_prob *probs) {
+ const int l = tree[i];
+ const unsigned int left_count = (l <= 0)
+ ? counts[-l]
+ : tree_merge_probs_impl(l, tree, pre_probs, counts,
+ count_sat, max_update, probs);
+ const int r = tree[i + 1];
+ const unsigned int right_count = (r <= 0)
+ ? counts[-r]
+ : tree_merge_probs_impl(r, tree, pre_probs, counts,
+ count_sat, max_update, probs);
+ const unsigned int ct[2] = { left_count, right_count };
+ probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
+ count_sat, max_update);
+ return left_count + right_count;
+}
+
+void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+ const unsigned int *counts, unsigned int count_sat,
+ unsigned int max_update_factor, vp9_prob *probs) {
+ tree_merge_probs_impl(0, tree, pre_probs, counts, count_sat,
+ max_update_factor, probs);
+}
diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h
index cc8d8ab38..f36148035 100644
--- a/vp9/common/vp9_prob.h
+++ b/vp9/common/vp9_prob.h
@@ -79,37 +79,10 @@ static INLINE vp9_prob merge_probs(vp9_prob pre_prob,
return weighted_prob(pre_prob, prob, factor);
}
-static unsigned int tree_merge_probs_impl(unsigned int i,
- const vp9_tree_index *tree,
- const vp9_prob *pre_probs,
- const unsigned int *counts,
- unsigned int count_sat,
- unsigned int max_update_factor,
- vp9_prob *probs) {
- const int l = tree[i];
- const unsigned int left_count = (l <= 0)
- ? counts[-l]
- : tree_merge_probs_impl(l, tree, pre_probs, counts,
- count_sat, max_update_factor, probs);
- const int r = tree[i + 1];
- const unsigned int right_count = (r <= 0)
- ? counts[-r]
- : tree_merge_probs_impl(r, tree, pre_probs, counts,
- count_sat, max_update_factor, probs);
- const unsigned int ct[2] = { left_count, right_count };
- probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
- count_sat, max_update_factor);
- return left_count + right_count;
-}
+void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+ const unsigned int *counts, unsigned int count_sat,
+ unsigned int max_update_factor, vp9_prob *probs);
-static void tree_merge_probs(const vp9_tree_index *tree,
- const vp9_prob *pre_probs,
- const unsigned int *counts,
- unsigned int count_sat,
- unsigned int max_update_factor, vp9_prob *probs) {
- tree_merge_probs_impl(0, tree, pre_probs, counts,
- count_sat, max_update_factor, probs);
-}
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 6dbdb4216..def12554d 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -130,12 +130,13 @@ int16_t vp9_ac_quant(int qindex, int delta) {
}
-int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex) {
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+ int base_qindex) {
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
- return seg->abs_delta == SEGMENT_ABSDATA ?
- data : // Abs value
- clamp(base_qindex + data, 0, MAXQ); // Delta value
+ const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ?
+ data : base_qindex + data;
+ return clamp(seg_qindex, 0, MAXQ);
} else {
return base_qindex;
}
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index af50e23cd..581104006 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -27,7 +27,8 @@ void vp9_init_quant_tables();
int16_t vp9_dc_quant(int qindex, int delta);
int16_t vp9_ac_quant(int qindex, int delta);
-int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex);
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+ int base_qindex);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index db20f19d9..df603ad70 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -139,9 +139,6 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
return clamped_mv;
}
-// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
-// calculate the subsampled BLOCK_SIZE, but that type isn't defined for
-// sizes smaller than 16x16 yet.
static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
int bw, int bh,
int x, int y, int w, int h,
@@ -270,8 +267,8 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
: mi_mv_pred_q4(mi, ref))
: mi->mbmi.mv[ref].as_mv;
MV32 scaled_mv;
- int xs, ys, x0, y0, x0_16, y0_16, x1, y1, frame_width,
- frame_height, subpel_x, subpel_y, buf_stride;
+ int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride,
+ subpel_x, subpel_y;
uint8_t *ref_frame, *buf_ptr;
const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf;
const MV mv_q4 = {
@@ -321,10 +318,6 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
x0_16 += scaled_mv.col;
y0_16 += scaled_mv.row;
- // Get reference block bottom right coordinate.
- x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
- y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
-
// Get reference block pointer.
buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
buf_stride = pre_buf->stride;
@@ -333,6 +326,9 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
// width/height is not a multiple of 8 pixels.
if (scaled_mv.col || scaled_mv.row ||
(frame_width & 0x7) || (frame_height & 0x7)) {
+ // Get reference block bottom right coordinate.
+ int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
+ int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
int x_pad = 0, y_pad = 0;
if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) {
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index bf738c28b..dccd60938 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -39,18 +39,18 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
enum mv_precision precision,
int x, int y);
-static int scaled_buffer_offset(int x_offset, int y_offset, int stride,
- const struct scale_factors *sf) {
+static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
+ const struct scale_factors *sf) {
const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
return y * stride + x;
}
-static void setup_pred_plane(struct buf_2d *dst,
- uint8_t *src, int stride,
- int mi_row, int mi_col,
- const struct scale_factors *scale,
- int subsampling_x, int subsampling_y) {
+static INLINE void setup_pred_plane(struct buf_2d *dst,
+ uint8_t *src, int stride,
+ int mi_row, int mi_col,
+ const struct scale_factors *scale,
+ int subsampling_x, int subsampling_y) {
const int x = (MI_SIZE * mi_col) >> subsampling_x;
const int y = (MI_SIZE * mi_row) >> subsampling_y;
dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 96ba3e464..71a41a9de 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -382,34 +382,34 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
/* slower path if the block needs border extension */
if (x0 + 2 * bs <= frame_width) {
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, 2 * bs + 1);
+ vpx_memcpy(above_row, above_ref, 2 * bs);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);
+ vpx_memcpy(above_row, above_ref, bs);
vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 + bs <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);
+ vpx_memcpy(above_row, above_ref, bs);
vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
}
- above_row[-1] = left_available ? above_ref[-1] : 129;
}
+ above_row[-1] = left_available ? above_ref[-1] : 129;
} else {
/* faster path if the block does not need extension */
if (bs == 4 && right_available && left_available) {
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 04a40bd58..4031bda55 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -264,13 +264,13 @@ prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8
specialize vp9_convolve_avg $sse2_x86inc neon dspr2
prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 sse2 ssse3 neon dspr2
+specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2
prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_horiz sse2 ssse3 neon dspr2
+specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2
prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_vert sse2 ssse3 neon dspr2
+specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2
prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_avg sse2 ssse3 neon dspr2
@@ -386,7 +386,7 @@ prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_strid
specialize vp9_variance4x4 mmx $sse2_x86inc
prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc
+specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc
@@ -416,7 +416,7 @@ prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr,
specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc
prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc
+specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc
@@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
fi
# fdct functions
-prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht4x4 sse2 avx2
+prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht4x4 sse2 avx2
-prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht8x8 sse2 avx2
+prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht8x8 sse2 avx2
-prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht16x16 sse2 avx2
+prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht16x16 sse2 avx2
prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
specialize vp9_fwht4x4
@@ -737,20 +737,20 @@ specialize vp9_fdct32x32_rd sse2 avx2
#
# Motion search
#
-prototype int vp9_full_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, int n"
+prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"
specialize vp9_full_search_sad sse3 sse4_1
vp9_full_search_sad_sse3=vp9_full_search_sadx3
vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
-prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
specialize vp9_refining_search_sad sse3
vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
-prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
specialize vp9_diamond_search_sad sse3
vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
-prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
specialize vp9_full_range_search
prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 90b0d0bf9..a9dda1889 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -40,12 +40,12 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h);
-static int vp9_is_valid_scale(const struct scale_factors *sf) {
+static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_INVALID_SCALE &&
sf->y_scale_fp != REF_INVALID_SCALE;
}
-static int vp9_is_scaled(const struct scale_factors *sf) {
+static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_NO_SCALE ||
sf->y_scale_fp != REF_NO_SCALE;
}
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index ee9a4823b..72edbca55 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -11,13 +11,17 @@
#ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
-#ifdef __cplusplus
-extern "C" {
+#ifdef _MSC_VER
+# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
+# include <intrin.h>
+# define USE_MSC_INTRIN
+# endif
+# include <math.h>
+# define snprintf _snprintf
#endif
-#ifdef _MSC_VER
-#include <math.h>
-#define snprintf _snprintf
+#ifdef __cplusplus
+extern "C" {
#endif
#include "./vpx_config.h"
@@ -30,7 +34,7 @@ void vpx_reset_mmx_state(void);
#if defined(_MSC_VER) && _MSC_VER < 1800
// round is not defined in MSVC before VS2013.
-static int round(double x) {
+static INLINE int round(double x) {
if (x < 0)
return (int)ceil(x - 0.5);
else
@@ -44,9 +48,7 @@ static int round(double x) {
static INLINE int get_msb(unsigned int n) {
return 31 ^ __builtin_clz(n);
}
-#elif defined(_MSC_VER) && _MSC_VER > 1310 && \
- (defined(_M_X64) || defined(_M_IX86))
-#include <intrin.h>
+#elif defined(USE_MSC_INTRIN)
#pragma intrinsic(_BitScanReverse)
static INLINE int get_msb(unsigned int n) {
@@ -54,6 +56,7 @@ static INLINE int get_msb(unsigned int n) {
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
+#undef USE_MSC_INTRIN
#else
// Returns (int)floor(log2(n)). n must be > 0.
static INLINE int get_msb(unsigned int n) {
@@ -73,9 +76,6 @@ static INLINE int get_msb(unsigned int n) {
}
#endif
-struct VP9Common;
-void vp9_machine_specific_config(struct VP9Common *cm);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 8ab5fb1bc..1b4904c39 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -16,15 +16,15 @@
typedef void filter8_1dfunction (
const unsigned char *src_ptr,
- const unsigned int src_pitch,
+ const ptrdiff_t src_pitch,
unsigned char *output_ptr,
- unsigned int out_pitch,
+ ptrdiff_t out_pitch,
unsigned int output_height,
const short *filter
);
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt1, opt2) \
-void vp9_convolve8_##name##_##opt1(const uint8_t *src, ptrdiff_t src_stride, \
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+ void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
@@ -32,50 +32,68 @@ void vp9_convolve8_##name##_##opt1(const uint8_t *src, ptrdiff_t src_stride, \
if (step_q4 == 16 && filter[3] != 128) { \
if (filter[0] || filter[1] || filter[2]) { \
while (w >= 16) { \
- vp9_filter_block1d16_##dir##8_##avg##opt1(src_start, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 16; \
dst += 16; \
w -= 16; \
} \
while (w >= 8) { \
- vp9_filter_block1d8_##dir##8_##avg##opt1(src_start, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 8; \
dst += 8; \
w -= 8; \
} \
while (w >= 4) { \
- vp9_filter_block1d4_##dir##8_##avg##opt1(src_start, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 4; \
dst += 4; \
w -= 4; \
} \
} else { \
while (w >= 16) { \
- vp9_filter_block1d16_##dir##2_##avg##opt2(src, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d16_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 16; \
dst += 16; \
w -= 16; \
} \
while (w >= 8) { \
- vp9_filter_block1d8_##dir##2_##avg##opt2(src, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d8_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 8; \
dst += 8; \
w -= 8; \
} \
while (w >= 4) { \
- vp9_filter_block1d4_##dir##2_##avg##opt2(src, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d4_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 4; \
dst += 4; \
w -= 4; \
@@ -121,14 +139,79 @@ void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
} \
}
+#if HAVE_AVX2
+filter8_1dfunction vp9_filter_block1d16_v8_avx2;
+filter8_1dfunction vp9_filter_block1d16_h8_avx2;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+#if (ARCH_X86_64)
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
+#else
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
+#endif
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
+#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
+#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
+#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3
+#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3
+#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3
+#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3
+// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
+// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, avx2);
+#endif
#if HAVE_SSSE3
+#if (ARCH_X86_64)
+filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
+#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
+#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
+#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
+#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
+#else
filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+#endif
filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
@@ -136,18 +219,18 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_sse2;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
@@ -169,11 +252,11 @@ filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
// const int16_t *filter_x, int x_step_q4,
// const int16_t *filter_y, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3, sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3, sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3, sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
- ssse3, sse2);
+ ssse3);
// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
@@ -236,11 +319,10 @@ filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
// const int16_t *filter_x, int x_step_q4,
// const int16_t *filter_y, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2, sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2, sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2, sse2);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2,
- sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm
index a7f69307d..91055b9f9 100644
--- a/vp9/common/x86/vp9_loopfilter_mmx.asm
+++ b/vp9/common/x86/vp9_loopfilter_mmx.asm
@@ -527,7 +527,7 @@ sym(vp9_lpf_vertical_4_mmx):
pxor mm7, [GLOBAL(t80)] ; unoffset
; mm7 = q1
- ; tranpose and write back
+ ; transpose and write back
; mm1 = 72 62 52 42 32 22 12 02
; mm6 = 73 63 53 43 33 23 13 03
; mm3 = 74 64 54 44 34 24 14 04
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
new file mode 100644
index 000000000..efa960c66
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h>
+#include "vpx_ports/mem.h"
+
+// filters for 16_h8 and 16_v8
+DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = {
+ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+
+DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = {
+ 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
+ 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
+};
+
+DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = {
+ 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,
+ 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
+};
+
+DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
+};
+
+void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i filtersReg;
+ __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
+ __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
+ __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
+ __m256i srcReg32b1, srcReg32b2, filtersReg32;
+ unsigned int i;
+ unsigned int src_stride, dst_stride;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the same data
+ // in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+ // have the same data in both lanes of a 256 bit register
+#if defined (__GNUC__)
+#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
+(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
+ filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
+#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
+ filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
+#else
+ filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+#else
+ filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+
+ // duplicate only the first 16 bits (first and second byte)
+ // across 256 bit register
+ firstFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x100u));
+ // duplicate only the second 16 bits (third and forth byte)
+ // across 256 bit register
+ secondFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x302u));
+ // duplicate only the third 16 bits (fifth and sixth byte)
+ // across 256 bit register
+ thirdFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x504u));
+ // duplicate only the forth 16 bits (seventh and eighth byte)
+ // across 256 bit register
+ forthFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x706u));
+
+ filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+ filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+ filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+ filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2);
+
+ // multiple the size of the source and destination stride by two
+ src_stride = src_pixels_per_line << 1;
+ dst_stride = output_pitch << 1;
+ for (i = output_height; i > 1; i-=2) {
+ // load the 2 strides of source
+ srcReg32b1 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr-3)));
+ srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
+ _mm_loadu_si128((__m128i *)
+ (src_ptr+src_pixels_per_line-3)), 1);
+
+ // filter the source buffer
+ srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
+ srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
+ srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+
+ // add and saturate the results together
+ srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
+
+ // filter the source buffer
+ srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
+ srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+ srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
+
+ // add and saturate the results together
+ srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1,
+ _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
+
+ // reading 2 strides of the next 16 bytes
+ // (part of it was being read by earlier read)
+ srcReg32b2 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+5)));
+ srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
+ _mm_loadu_si128((__m128i *)
+ (src_ptr+src_pixels_per_line+5)), 1);
+
+ // add and saturate the results together
+ srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1,
+ _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
+
+ // filter the source buffer
+ srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
+ srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
+ srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+
+ // add and saturate the results together
+ srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
+
+ // filter the source buffer
+ srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
+ srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+ srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
+
+ // add and saturate the results together
+ srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1,
+ _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
+ srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1,
+ _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
+
+
+ srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64);
+
+ srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64);
+
+ // shift by 7 bit each 16 bit
+ srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7);
+ srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7);
+
+ // shrink to 8 bit each 16 bits, the first lane contain the first
+ // convolve result and the second lane contain the second convolve
+ // result
+ srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1,
+ srcRegFilt32b2_1);
+
+ src_ptr+=src_stride;
+
+ // save 16 bytes
+ _mm_store_si128((__m128i*)output_ptr,
+ _mm256_castsi256_si128(srcRegFilt32b1_1));
+
+ // save the next 16 bits
+ _mm_store_si128((__m128i*)(output_ptr+output_pitch),
+ _mm256_extractf128_si256(srcRegFilt32b1_1, 1));
+ output_ptr+=dst_stride;
+ }
+
+ // if the number of strides is odd.
+ // process only 16 bytes
+ if (i > 0) {
+ __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
+ __m128i srcRegFilt2, srcRegFilt3;
+
+ srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+
+ // filter the source buffer
+ srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1,
+ _mm256_castsi256_si128(filt1Reg));
+ srcRegFilt2 = _mm_shuffle_epi8(srcReg1,
+ _mm256_castsi256_si128(filt2Reg));
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1,
+ _mm256_castsi256_si128(firstFilters));
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+ _mm256_castsi256_si128(secondFilters));
+
+ // add and saturate the results together
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
+
+ // filter the source buffer
+ srcRegFilt3= _mm_shuffle_epi8(srcReg1,
+ _mm256_castsi256_si128(filt4Reg));
+ srcRegFilt2= _mm_shuffle_epi8(srcReg1,
+ _mm256_castsi256_si128(filt3Reg));
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
+ _mm256_castsi256_si128(forthFilters));
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+ _mm256_castsi256_si128(thirdFilters));
+
+ // add and saturate the results together
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+ _mm_min_epi16(srcRegFilt3, srcRegFilt2));
+
+ // reading the next 16 bytes
+ // (part of it was being read by earlier read)
+ srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+
+ // add and saturate the results together
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+ _mm_max_epi16(srcRegFilt3, srcRegFilt2));
+
+ // filter the source buffer
+ srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2,
+ _mm256_castsi256_si128(filt1Reg));
+ srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
+ _mm256_castsi256_si128(filt2Reg));
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1,
+ _mm256_castsi256_si128(firstFilters));
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+ _mm256_castsi256_si128(secondFilters));
+
+ // add and saturate the results together
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
+
+ // filter the source buffer
+ srcRegFilt3 = _mm_shuffle_epi8(srcReg2,
+ _mm256_castsi256_si128(filt4Reg));
+ srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
+ _mm256_castsi256_si128(filt3Reg));
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
+ _mm256_castsi256_si128(forthFilters));
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+ _mm256_castsi256_si128(thirdFilters));
+
+ // add and saturate the results together
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+ _mm_min_epi16(srcRegFilt3, srcRegFilt2));
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+ _mm_max_epi16(srcRegFilt3, srcRegFilt2));
+
+
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+ _mm256_castsi256_si128(addFilterReg64));
+
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+ _mm256_castsi256_si128(addFilterReg64));
+
+ // shift by 7 bit each 16 bit
+ srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
+ srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
+
+ // shrink to 8 bit each 16 bits, the first lane contain the first
+ // convolve result and the second lane contain the second convolve
+ // result
+ srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1);
+
+ // save 16 bytes
+ _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1);
+ }
+}
+
+void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i filtersReg;
+ __m256i addFilterReg64;
+ __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
+ __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
+ __m256i srcReg32b11, srcReg32b12, srcReg32b13, filtersReg32;
+ __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
+ unsigned int i;
+ unsigned int src_stride, dst_stride;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the
+ // same data in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+ // have the same data in both lanes of a 256 bit register
+#if defined (__GNUC__)
+#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
+(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
+ filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
+#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
+ filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
+#else
+ filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+#else
+ filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+
+ // duplicate only the first 16 bits (first and second byte)
+ // across 256 bit register
+ firstFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x100u));
+ // duplicate only the second 16 bits (third and forth byte)
+ // across 256 bit register
+ secondFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x302u));
+ // duplicate only the third 16 bits (fifth and sixth byte)
+ // across 256 bit register
+ thirdFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x504u));
+ // duplicate only the forth 16 bits (seventh and eighth byte)
+ // across 256 bit register
+ forthFilters = _mm256_shuffle_epi8(filtersReg32,
+ _mm256_set1_epi16(0x706u));
+
+ // multiple the size of the source and destination stride by two
+ src_stride = src_pitch << 1;
+ dst_stride = out_pitch << 1;
+
+ // load 16 bytes 7 times in stride of src_pitch
+ srcReg32b1 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr)));
+ srcReg32b2 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch)));
+ srcReg32b3 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2)));
+ srcReg32b4 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3)));
+ srcReg32b5 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4)));
+ srcReg32b6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5)));
+ srcReg32b7 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6)));
+
+ // have each consecutive loads on the same 256 register
+ srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
+ _mm256_castsi256_si128(srcReg32b2), 1);
+ srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
+ _mm256_castsi256_si128(srcReg32b3), 1);
+ srcReg32b3 = _mm256_inserti128_si256(srcReg32b3,
+ _mm256_castsi256_si128(srcReg32b4), 1);
+ srcReg32b4 = _mm256_inserti128_si256(srcReg32b4,
+ _mm256_castsi256_si128(srcReg32b5), 1);
+ srcReg32b5 = _mm256_inserti128_si256(srcReg32b5,
+ _mm256_castsi256_si128(srcReg32b6), 1);
+ srcReg32b6 = _mm256_inserti128_si256(srcReg32b6,
+ _mm256_castsi256_si128(srcReg32b7), 1);
+
+ // merge every two consecutive registers except the last one
+ srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
+ srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2);
+
+ // save
+ srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
+
+ // save
+ srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4);
+
+ // save
+ srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
+
+ // save
+ srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6);
+
+
+ for (i = output_height; i > 1; i-=2) {
+ // load the last 2 loads of 16 bytes and have every two
+ // consecutive loads in the same 256 bit register
+ srcReg32b8 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)));
+ srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
+ _mm256_castsi256_si128(srcReg32b8), 1);
+ srcReg32b9 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*8)));
+ srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
+ _mm256_castsi256_si128(srcReg32b9), 1);
+
+ // merge every two consecutive registers
+ // save
+ srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8);
+ srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
+ srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
+ srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
+ srcReg32b8 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
+
+ // add and saturate the results together
+ srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b8);
+
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
+ srcReg32b6 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
+ srcReg32b13 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
+
+
+ // add and saturate the results together
+ srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
+ _mm256_min_epi16(srcReg32b8, srcReg32b12));
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+ _mm256_min_epi16(srcReg32b6, srcReg32b13));
+
+ // add and saturate the results together
+ srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
+ _mm256_max_epi16(srcReg32b8, srcReg32b12));
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+ _mm256_max_epi16(srcReg32b6, srcReg32b13));
+
+
+ srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
+
+ // shift by 7 bit each 16 bit
+ srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7);
+ srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7);
+
+ // shrink to 8 bit each 16 bits, the first lane contain the first
+ // convolve result and the second lane contain the second convolve
+ // result
+ srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1);
+
+ src_ptr+=src_stride;
+
+ // save 16 bytes
+ _mm_store_si128((__m128i*)output_ptr,
+ _mm256_castsi256_si128(srcReg32b1));
+
+ // save the next 16 bits
+ _mm_store_si128((__m128i*)(output_ptr+out_pitch),
+ _mm256_extractf128_si256(srcReg32b1, 1));
+
+ output_ptr+=dst_stride;
+
+ // save part of the registers for next strides
+ srcReg32b10 = srcReg32b11;
+ srcReg32b1 = srcReg32b3;
+ srcReg32b11 = srcReg32b2;
+ srcReg32b3 = srcReg32b5;
+ srcReg32b2 = srcReg32b4;
+ srcReg32b5 = srcReg32b7;
+ srcReg32b7 = srcReg32b9;
+ }
+ if (i > 0) {
+ __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5;
+ __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8;
+ // load the last 16 bytes
+ srcRegFilt8 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7));
+
+ // merge the last 2 results together
+ srcRegFilt4 = _mm_unpacklo_epi8(
+ _mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
+ srcRegFilt7 = _mm_unpackhi_epi8(
+ _mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10),
+ _mm256_castsi256_si128(firstFilters));
+ srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4,
+ _mm256_castsi256_si128(forthFilters));
+ srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1),
+ _mm256_castsi256_si128(firstFilters));
+ srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7,
+ _mm256_castsi256_si128(forthFilters));
+
+ // add and saturate the results together
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
+ srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7);
+
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11),
+ _mm256_castsi256_si128(secondFilters));
+ srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3),
+ _mm256_castsi256_si128(secondFilters));
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2),
+ _mm256_castsi256_si128(thirdFilters));
+ srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5),
+ _mm256_castsi256_si128(thirdFilters));
+
+ // add and saturate the results together
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+ _mm_min_epi16(srcRegFilt4, srcRegFilt6));
+ srcRegFilt3 = _mm_adds_epi16(srcRegFilt3,
+ _mm_min_epi16(srcRegFilt5, srcRegFilt7));
+
+ // add and saturate the results together
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+ _mm_max_epi16(srcRegFilt4, srcRegFilt6));
+ srcRegFilt3 = _mm_adds_epi16(srcRegFilt3,
+ _mm_max_epi16(srcRegFilt5, srcRegFilt7));
+
+
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+ _mm256_castsi256_si128(addFilterReg64));
+ srcRegFilt3 = _mm_adds_epi16(srcRegFilt3,
+ _mm256_castsi256_si128(addFilterReg64));
+
+ // shift by 7 bit each 16 bit
+ srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
+ srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7);
+
+ // shrink to 8 bit each 16 bits, the first lane contain the first
+ // convolve result and the second lane contain the second convolve
+ // result
+ srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3);
+
+ // save 16 bytes
+ _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
+ }
+}
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
new file mode 100644
index 000000000..cf28d8d2b
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <tmmintrin.h>
+#include "vpx_ports/mem.h"
+#include "vpx_ports/emmintrin_compat.h"
+
+// filters only for the 4_h8 convolution
+DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = {
+ 0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6
+};
+
+DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = {
+ 4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10
+};
+
+// filters for 8_h8 and 16_h8
+DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = {
+ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+
+DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = {
+ 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
+};
+
+DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = {
+ 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
+};
+
+DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
+};
+
+void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
+ __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
+ __m128i addFilterReg64, filtersReg, srcReg, minReg;
+ unsigned int i;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 =_mm_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the same data
+ // in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+
+ // duplicate only the first 16 bits in the filter into the first lane
+ firstFilters = _mm_shufflelo_epi16(filtersReg, 0);
+ // duplicate only the third 16 bit in the filter into the first lane
+ secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
+ // duplicate only the seconds 16 bits in the filter into the second lane
+ firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
+ // duplicate only the forth 16 bits in the filter into the second lane
+ secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
+
+ // loading the local filters
+ thirdFilters =_mm_load_si128((__m128i const *)filt1_4_h8);
+ forthFilters = _mm_load_si128((__m128i const *)filt2_4_h8);
+
+ for (i = 0; i < output_height; i++) {
+ srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
+
+ // filter the source buffer
+ srcRegFilt1= _mm_shuffle_epi8(srcReg, thirdFilters);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg, forthFilters);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+
+ // extract the higher half of the lane
+ srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8);
+ srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8);
+
+ minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2);
+
+ // add and saturate all the results together
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
+ srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
+
+ // shift by 7 bit each 16 bits
+ srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
+
+ // shrink to 8 bit each 16 bits
+ srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
+ src_ptr+=src_pixels_per_line;
+
+ // save only 4 bytes
+ *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1);
+
+ output_ptr+=output_pitch;
+ }
+}
+
+void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg;
+ __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
+ __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
+ __m128i addFilterReg64, filtersReg, minReg;
+ unsigned int i;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the same data
+ // in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+
+ // duplicate only the first 16 bits (first and second byte)
+ // across 128 bit register
+ firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
+ // duplicate only the second 16 bits (third and forth byte)
+ // across 128 bit register
+ secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
+ // duplicate only the third 16 bits (fifth and sixth byte)
+ // across 128 bit register
+ thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
+ // duplicate only the forth 16 bits (seventh and eighth byte)
+ // across 128 bit register
+ forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
+
+ filt1Reg = _mm_load_si128((__m128i const *)filt1_global);
+ filt2Reg = _mm_load_si128((__m128i const *)filt2_global);
+ filt3Reg = _mm_load_si128((__m128i const *)filt3_global);
+ filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
+
+ for (i = 0; i < output_height; i++) {
+ srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
+
+ // filter the source buffer
+ srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+
+ // filter the source buffer
+ srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg);
+ srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters);
+ srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters);
+
+ // add and saturate all the results together
+ minReg = _mm_min_epi16(srcRegFilt4, srcRegFilt3);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
+
+ srcRegFilt4= _mm_max_epi16(srcRegFilt4, srcRegFilt3);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
+
+ // shift by 7 bit each 16 bits
+ srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
+
+ // shrink to 8 bit each 16 bits
+ srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
+
+ src_ptr+=src_pixels_per_line;
+
+ // save only 8 bytes
+ _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1);
+
+ output_ptr+=output_pitch;
+ }
+}
+
+void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i addFilterReg64, filtersReg, srcReg1, srcReg2;
+ __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
+ __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
+ __m128i srcRegFilt1_1, srcRegFilt2_1, srcRegFilt2, srcRegFilt3;
+ unsigned int i;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the same data
+ // in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+
+ // duplicate only the first 16 bits (first and second byte)
+ // across 128 bit register
+ firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
+ // duplicate only the second 16 bits (third and forth byte)
+ // across 128 bit register
+ secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
+ // duplicate only the third 16 bits (fifth and sixth byte)
+ // across 128 bit register
+ thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
+ // duplicate only the forth 16 bits (seventh and eighth byte)
+ // across 128 bit register
+ forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
+
+ filt1Reg = _mm_load_si128((__m128i const *)filt1_global);
+ filt2Reg = _mm_load_si128((__m128i const *)filt2_global);
+ filt3Reg = _mm_load_si128((__m128i const *)filt3_global);
+ filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
+
+ for (i = 0; i < output_height; i++) {
+ srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+
+ // filter the source buffer
+ srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt2Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+
+ // add and saturate the results together
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
+
+ // filter the source buffer
+ srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt4Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
+
+ // add and saturate the results together
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+ _mm_min_epi16(srcRegFilt3, srcRegFilt2));
+
+ // reading the next 16 bytes.
+ // (part of it was being read by earlier read)
+ srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+
+ // add and saturate the results together
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+ _mm_max_epi16(srcRegFilt3, srcRegFilt2));
+
+ // filter the source buffer
+ srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt2Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+
+ // add and saturate the results together
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
+
+ // filter the source buffer
+ srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt4Reg);
+ srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
+
+ // add and saturate the results together
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+ _mm_min_epi16(srcRegFilt3, srcRegFilt2));
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+ _mm_max_epi16(srcRegFilt3, srcRegFilt2));
+
+ srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, addFilterReg64);
+ srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, addFilterReg64);
+
+ // shift by 7 bit each 16 bit
+ srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
+ srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
+
+ // shrink to 8 bit each 16 bits, the first lane contain the first
+ // convolve result and the second lane contain the second convolve
+ // result
+ srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1);
+
+ src_ptr+=src_pixels_per_line;
+
+ // save 16 bytes
+ _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1);
+
+ output_ptr+=output_pitch;
+ }
+}
+
+void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i addFilterReg64, filtersReg, minReg, srcRegFilt6;
+ __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
+ __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4, srcRegFilt5;
+ unsigned int i;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the same data
+ // in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+
+ // duplicate only the first 16 bits in the filter
+ firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
+ // duplicate only the second 16 bits in the filter
+ secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
+ // duplicate only the third 16 bits in the filter
+ thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
+ // duplicate only the forth 16 bits in the filter
+ forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
+
+ for (i = 0; i < output_height; i++) {
+ // load the first 8 bytes
+ srcRegFilt1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]);
+ // load the next 8 bytes in stride of src_pitch
+ srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch)[0]);
+ srcRegFilt3 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*2)[0]);
+ srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*3)[0]);
+
+ // merge the result together
+ srcRegFilt1 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2);
+ srcRegFilt3 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4);
+
+ // load the next 8 bytes in stride of src_pitch
+ srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*4)[0]);
+ srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*5)[0]);
+ srcRegFilt5 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*6)[0]);
+ srcRegFilt6 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*7)[0]);
+
+ // merge the result together
+ srcRegFilt2 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt4);
+ srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt5, srcRegFilt6);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
+ srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
+ srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters);
+
+ // add and saturate the results together
+ minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5);
+ srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
+
+ // shift by 7 bit each 16 bit
+ srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
+
+ // shrink to 8 bit each 16 bits
+ srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
+
+ src_ptr+=src_pitch;
+
+ // save only 8 bytes convolve result
+ _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1);
+
+ output_ptr+=out_pitch;
+ }
+}
+
+void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ int16_t *filter) {
+ __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt2, srcRegFilt3;
+ __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
+ __m128i srcRegFilt4, srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8;
+ unsigned int i;
+
+ // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+ addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
+ filtersReg = _mm_loadu_si128((__m128i *)filter);
+ // converting the 16 bit (short) to 8 bit (byte) and have the same data
+ // in both lanes of 128 bit register.
+ filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+
+ // duplicate only the first 16 bits in the filter
+ firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
+ // duplicate only the second 16 bits in the filter
+ secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
+ // duplicate only the third 16 bits in the filter
+ thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
+ // duplicate only the forth 16 bits in the filter
+ forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
+
+ for (i = 0; i < output_height; i++) {
+ // load the first 16 bytes
+ srcRegFilt1 = _mm_loadu_si128((__m128i *)(src_ptr));
+ // load the next 16 bytes in stride of src_pitch
+ srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch));
+ srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6));
+ srcRegFilt4 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7));
+
+ // merge the result together
+ srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2);
+ srcRegFilt6 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4);
+ srcRegFilt1 = _mm_unpackhi_epi8(srcRegFilt1, srcRegFilt2);
+ srcRegFilt3 = _mm_unpackhi_epi8(srcRegFilt3, srcRegFilt4);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters);
+ srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters);
+ srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
+ srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+
+ // add and saturate the results together
+ srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
+
+ // load the next 16 bytes in stride of two/three src_pitch
+ srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2));
+ srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3));
+
+ // merge the result together
+ srcRegFilt4 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3);
+ srcRegFilt6 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, secondFilters);
+ srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters);
+
+ // load the next 16 bytes in stride of four/five src_pitch
+ srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4));
+ srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5));
+
+ // merge the result together
+ srcRegFilt7 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3);
+ srcRegFilt8 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters);
+ srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters);
+
+ // add and saturate the results together
+ srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
+ _mm_min_epi16(srcRegFilt4, srcRegFilt7));
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+ _mm_min_epi16(srcRegFilt6, srcRegFilt8));
+
+ // add and saturate the results together
+ srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
+ _mm_max_epi16(srcRegFilt4, srcRegFilt7));
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+ _mm_max_epi16(srcRegFilt6, srcRegFilt8));
+ srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64);
+ srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
+
+ // shift by 7 bit each 16 bit
+ srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7);
+ srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
+
+ // shrink to 8 bit each 16 bits, the first lane contain the first
+ // convolve result and the second lane contain the second convolve
+ // result
+ srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1);
+
+ src_ptr+=src_pitch;
+
+ // save 16 bytes convolve result
+ _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
+
+ output_ptr+=out_pitch;
+ }
+}
diff --git a/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm b/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
new file mode 100644
index 000000000..b5e18fe6d
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
@@ -0,0 +1,422 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro GET_PARAM_4 0
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm3, [rdx] ;load filters
+ psrldq xmm3, 6
+ packsswb xmm3, xmm3
+ pshuflw xmm3, xmm3, 0b ;k3_k4
+
+ movq xmm2, rcx ;rounding
+ pshufd xmm2, xmm2, 0
+
+ movsxd rax, DWORD PTR arg(1) ;pixels_per_line
+ movsxd rdx, DWORD PTR arg(3) ;out_pitch
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+%endm
+
+%macro APPLY_FILTER_4 1
+ punpcklbw xmm0, xmm1
+ pmaddubsw xmm0, xmm3
+
+ paddsw xmm0, xmm2 ;rounding
+ psraw xmm0, 7 ;shift
+ packuswb xmm0, xmm0 ;pack to byte
+
+%if %1
+ movd xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movd [rdi], xmm0
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+%macro GET_PARAM 0
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm7, [rdx] ;load filters
+ psrldq xmm7, 6
+ packsswb xmm7, xmm7
+ pshuflw xmm7, xmm7, 0b ;k3_k4
+ punpcklwd xmm7, xmm7
+
+ movq xmm6, rcx ;rounding
+ pshufd xmm6, xmm6, 0
+
+ movsxd rax, DWORD PTR arg(1) ;pixels_per_line
+ movsxd rdx, DWORD PTR arg(3) ;out_pitch
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+%endm
+
+%macro APPLY_FILTER_8 1
+ punpcklbw xmm0, xmm1
+ pmaddubsw xmm0, xmm7
+
+ paddsw xmm0, xmm6 ;rounding
+ psraw xmm0, 7 ;shift
+ packuswb xmm0, xmm0 ;pack back to byte
+
+%if %1
+ movq xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movq [rdi], xmm0 ;store the result
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+%macro APPLY_FILTER_16 1
+ punpcklbw xmm0, xmm1
+ punpckhbw xmm2, xmm1
+ pmaddubsw xmm0, xmm7
+ pmaddubsw xmm2, xmm7
+
+ paddsw xmm0, xmm6 ;rounding
+ paddsw xmm2, xmm6
+ psraw xmm0, 7 ;shift
+ psraw xmm2, 7
+ packuswb xmm0, xmm2 ;pack back to byte
+
+%if %1
+ movdqu xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movdqu [rdi], xmm0 ;store the result
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movd xmm0, [rsi] ;load src
+ movd xmm1, [rsi + rax]
+
+ APPLY_FILTER_4 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movq xmm0, [rsi] ;0
+ movq xmm1, [rsi + rax] ;1
+
+ APPLY_FILTER_8 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;0
+ movdqu xmm1, [rsi + rax] ;1
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movd xmm0, [rsi] ;load src
+ movd xmm1, [rsi + rax]
+
+ APPLY_FILTER_4 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movq xmm0, [rsi] ;0
+ movq xmm1, [rsi + rax] ;1
+
+ APPLY_FILTER_8 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;0
+ movdqu xmm1, [rsi + rax] ;1
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_4 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_8 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqu xmm1, [rsi + 1]
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_4 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_8 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqu xmm1, [rsi + 1]
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 93ef7503f..e52b3f759 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -15,6 +15,7 @@
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem_ops.h"
#include "vpx_scale/vpx_scale.h"
#include "vp9/common/vp9_alloccommon.h"
@@ -39,20 +40,16 @@
#include "vp9/decoder/vp9_reader.h"
#include "vp9/decoder/vp9_thread.h"
-static int read_be32(const uint8_t *p) {
- return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
-}
-
static int is_compound_reference_allowed(const VP9_COMMON *cm) {
int i;
for (i = 1; i < REFS_PER_FRAME; ++i)
- if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1])
+ if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1])
return 1;
return 0;
}
-static void setup_compound_reference(VP9_COMMON *cm) {
+static void setup_compound_reference_mode(VP9_COMMON *cm) {
if (cm->ref_frame_sign_bias[LAST_FRAME] ==
cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
cm->comp_fixed_ref = ALTREF_FRAME;
@@ -116,33 +113,34 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
}
-static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, vp9_reader *r) {
+static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm,
+ vp9_reader *r) {
if (is_compound_reference_allowed(cm)) {
- REFERENCE_MODE mode = vp9_read_bit(r);
- if (mode)
- mode += vp9_read_bit(r);
- setup_compound_reference(cm);
- return mode;
+ return vp9_read_bit(r) ? (vp9_read_bit(r) ? REFERENCE_MODE_SELECT
+ : COMPOUND_REFERENCE)
+ : SINGLE_REFERENCE;
} else {
return SINGLE_REFERENCE;
}
}
-static void read_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
+static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
+ FRAME_CONTEXT *const fc = &cm->fc;
int i;
+
if (cm->reference_mode == REFERENCE_MODE_SELECT)
- for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
+ for (i = 0; i < COMP_INTER_CONTEXTS; ++i)
+ vp9_diff_update_prob(r, &fc->comp_inter_prob[i]);
if (cm->reference_mode != COMPOUND_REFERENCE)
- for (i = 0; i < REF_CONTEXTS; i++) {
- vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
- vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
+ for (i = 0; i < REF_CONTEXTS; ++i) {
+ vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]);
+ vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]);
}
if (cm->reference_mode != SINGLE_REFERENCE)
- for (i = 0; i < REF_CONTEXTS; i++)
- vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
+ for (i = 0; i < REF_CONTEXTS; ++i)
+ vp9_diff_update_prob(r, &fc->comp_ref_prob[i]);
}
static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) {
@@ -303,7 +301,7 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
dst, pd->dst.stride, dst, pd->dst.stride,
x, y, plane);
- if (!mi->mbmi.skip_coeff) {
+ if (!mi->mbmi.skip) {
const int eob = vp9_decode_block_tokens(cm, xd, plane, block,
plane_bsize, x, y, tx_size,
args->r);
@@ -350,9 +348,9 @@ static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
xd->mi_8x8 = cm->mi_grid_visible + offset;
xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
- // Special case: if prev_mi is NULL, the previous mode info context
- // cannot be used.
- xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
+
+ xd->last_mi = cm->coding_use_prev_mi && cm->prev_mi ?
+ xd->prev_mi_8x8[0] : NULL;
xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset;
xd->mi_8x8[0]->mbmi.sb_type = bsize;
@@ -397,7 +395,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
// Has to be called after set_offsets
mbmi = &xd->mi_8x8[0]->mbmi;
- if (mbmi->skip_coeff) {
+ if (mbmi->skip) {
reset_skip_context(xd, bsize);
} else {
if (cm->seg.enabled)
@@ -421,12 +419,12 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
// Reconstruction
- if (!mbmi->skip_coeff) {
+ if (!mbmi->skip) {
int eobtotal = 0;
struct inter_args arg = { cm, xd, r, &eobtotal };
vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
if (!less8x8 && eobtotal == 0)
- mbmi->skip_coeff = 1; // skip loopfilter
+ mbmi->skip = 1; // skip loopfilter
}
}
@@ -691,9 +689,14 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
vp9_update_frame_size(cm);
}
- vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_DEC_BORDER_IN_PIXELS);
+ if (vp9_realloc_frame_buffer(
+ get_frame_new_buffer(cm), cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS,
+ &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
+ cm->cb_priv)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+ }
}
static void setup_frame_size(VP9D_COMP *pbi,
@@ -831,7 +834,7 @@ static size_t get_tile(const uint8_t *const data_end,
vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt tile length");
- size = read_be32(*data);
+ size = mem_get_be32(*data);
*data += 4;
if (size > (size_t)(data_end - *data))
@@ -1114,7 +1117,13 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
cm->show_existing_frame = vp9_rb_read_bit(rb);
if (cm->show_existing_frame) {
// Show an existing frame directly.
- int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
+ const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
+
+ if (cm->frame_bufs[frame_to_show].ref_count < 1)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Buffer %d does not contain a decoded frame",
+ frame_to_show);
+
ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
pbi->refresh_frame_flags = 0;
cm->lf.filter_level = 0;
@@ -1198,9 +1207,11 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
}
if (!cm->error_resilient_mode) {
+ cm->coding_use_prev_mi = 1;
cm->refresh_frame_context = vp9_rb_read_bit(rb);
cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb);
} else {
+ cm->coding_use_prev_mi = 0;
cm->refresh_frame_context = 0;
cm->frame_parallel_decoding_mode = 1;
}
@@ -1258,8 +1269,10 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]);
- cm->reference_mode = read_reference_mode(cm, &r);
- read_reference_mode_probs(cm, &r);
+ cm->reference_mode = read_frame_reference_mode(cm, &r);
+ if (cm->reference_mode != SINGLE_REFERENCE)
+ setup_compound_reference_mode(cm);
+ read_frame_reference_mode_probs(cm, &r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
@@ -1368,7 +1381,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
alloc_tile_storage(pbi, tile_rows, tile_cols);
xd->mode_info_stride = cm->mode_info_stride;
- set_prev_mi(cm);
+ if (cm->coding_use_prev_mi)
+ set_prev_mi(cm);
+ else
+ cm->prev_mi = NULL;
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index c7fb71ddf..0fb7a1580 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -146,8 +146,8 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
return segment_id;
}
-static int read_skip_coeff(VP9_COMMON *cm, const MACROBLOCKD *xd,
- int segment_id, vp9_reader *r) {
+static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
+ int segment_id, vp9_reader *r) {
if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
@@ -169,7 +169,7 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm,
const BLOCK_SIZE bsize = mbmi->sb_type;
mbmi->segment_id = read_intra_segment_id(cm, xd, mi_row, mi_col, r);
- mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r);
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, bsize, 1, r);
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE;
@@ -257,13 +257,18 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
mv->col = ref->col + diff.col;
}
-static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, const MACROBLOCKD *xd,
- vp9_reader *r) {
- const int ctx = vp9_get_reference_mode_context(cm, xd);
- const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
- if (!cm->frame_parallel_decoding_mode)
- ++cm->counts.comp_inter[ctx][mode];
- return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
+static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
+ const MACROBLOCKD *xd,
+ vp9_reader *r) {
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ const int ctx = vp9_get_reference_mode_context(cm, xd);
+ const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
+ if (!cm->frame_parallel_decoding_mode)
+ ++cm->counts.comp_inter[ctx][mode];
+ return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
+ } else {
+ return cm->reference_mode;
+ }
}
// Read the referncence frame
@@ -277,10 +282,7 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
ref_frame[1] = NONE;
} else {
- const REFERENCE_MODE mode = (cm->reference_mode == REFERENCE_MODE_SELECT)
- ? read_reference_mode(cm, xd, r)
- : cm->reference_mode;
-
+ const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
// FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
if (mode == COMPOUND_REFERENCE) {
const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
@@ -356,6 +358,11 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi,
mbmi->uv_mode = read_intra_mode_uv(cm, r, mbmi->mode);
}
+static INLINE int is_mv_valid(const MV *mv) {
+ return mv->row > MV_LOW && mv->row < MV_UPP &&
+ mv->col > MV_LOW && mv->col < MV_UPP;
+}
+
static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode,
int_mv mv[2], int_mv ref_mv[2],
int_mv nearest_mv[2], int_mv near_mv[2],
@@ -367,14 +374,10 @@ static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode,
case NEWMV: {
nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ?
NULL : &cm->counts.mv;
- read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv,
- &cm->fc.nmvc, mv_counts, allow_hp);
- if (is_compound)
- read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv,
- &cm->fc.nmvc, mv_counts, allow_hp);
for (i = 0; i < 1 + is_compound; ++i) {
- ret = ret && mv[i].as_mv.row < MV_UPP && mv[i].as_mv.row > MV_LOW;
- ret = ret && mv[i].as_mv.col < MV_UPP && mv[i].as_mv.col > MV_LOW;
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc.nmvc, mv_counts,
+ allow_hp);
+ ret = ret && is_mv_valid(&mv[i].as_mv);
}
break;
}
@@ -520,10 +523,10 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm,
mbmi->mv[0].as_int = 0;
mbmi->mv[1].as_int = 0;
mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
- mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r);
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type,
- !mbmi->skip_coeff || !inter_block, r);
+ !mbmi->skip || !inter_block, r);
if (inter_block)
read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r);
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 128b9f8af..542732aa0 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -220,11 +220,13 @@ void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
CHECK_MEM_ERROR(cm, lf_sync->mutex_,
vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+ }
+
CHECK_MEM_ERROR(cm, lf_sync->cond_,
vpx_malloc(sizeof(*lf_sync->cond_) * rows));
-
for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&lf_sync->mutex_[i], NULL);
pthread_cond_init(&lf_sync->cond_[i], NULL);
}
#endif // CONFIG_MULTITHREAD
@@ -242,18 +244,29 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
if (lf_sync != NULL) {
int i;
- for (i = 0; i < rows; ++i) {
- pthread_mutex_destroy(&lf_sync->mutex_[i]);
- pthread_cond_destroy(&lf_sync->cond_[i]);
+ if (lf_sync->mutex_ != NULL) {
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex_[i]);
+ }
+ vpx_free(lf_sync->mutex_);
+ }
+ if (lf_sync->cond_ != NULL) {
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_destroy(&lf_sync->cond_[i]);
+ }
+ vpx_free(lf_sync->cond_);
}
- vpx_free(lf_sync->mutex_);
- vpx_free(lf_sync->cond_);
vpx_free(lf_sync->cur_sb_col);
+ // clear the structure as the source of this call may be a resize in which
+ // case this call will be followed by an _alloc() which may fail.
+ vpx_memset(lf_sync, 0, sizeof(*lf_sync));
}
#else
(void)rows;
- if (lf_sync != NULL)
+ if (lf_sync != NULL) {
vpx_free(lf_sync->cur_sb_col);
+ vpx_memset(lf_sync, 0, sizeof(*lf_sync));
+ }
#endif // CONFIG_MULTITHREAD
}
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 803d536ba..1d3522e13 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -290,9 +290,14 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
VP9_COMMON *const cm = &pbi->common;
for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
- if (mask & 1)
+ if (mask & 1) {
+ const int old_idx = cm->ref_frame_map[ref_index];
ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index],
cm->new_fb_idx);
+ if (old_idx >= 0 && cm->frame_bufs[old_idx].ref_count == 0)
+ cm->release_fb_cb(cm->cb_priv,
+ &cm->frame_bufs[old_idx].raw_frame_buffer);
+ }
++ref_index;
}
@@ -337,6 +342,10 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
cm->frame_refs[0].buf->corrupted = 1;
}
+ // Check if the previous frame was a frame without any references to it.
+ if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
+ cm->release_fb_cb(cm->cb_priv,
+ &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer);
cm->new_fb_idx = get_free_fb(cm);
if (setjmp(cm->error.jmp)) {
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index dc64a107c..34d1da7bd 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -14,6 +14,7 @@
#include "vpx/vpx_encoder.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem_ops.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
@@ -33,13 +34,7 @@
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_write_bit_buffer.h"
-
-#if defined(SECTIONBITS_OUTPUT)
-unsigned __int64 Sectionbits[500];
-#endif
-
#ifdef ENTROPY_STATS
-vp9_coeff_stats tree_update_hist[TX_SIZES][PLANE_TYPES];
extern unsigned int active_section;
#endif
@@ -67,13 +62,6 @@ static void write_inter_mode(vp9_writer *w, MB_PREDICTION_MODE mode,
&inter_mode_encodings[INTER_OFFSET(mode)]);
}
-static INLINE void write_be32(uint8_t *p, int value) {
- p[0] = value >> 24;
- p[1] = value >> 16;
- p[2] = value >> 8;
- p[3] = value;
-}
-
void vp9_encode_unsigned_max(struct vp9_write_bit_buffer *wb,
int data, int max) {
vp9_wb_write_literal(wb, data, get_unsigned_bits(max));
@@ -109,13 +97,13 @@ static void write_selected_tx_size(const VP9_COMP *cpi, MODE_INFO *m,
}
}
-static int write_skip_coeff(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
- vp9_writer *w) {
+static int write_skip(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
+ vp9_writer *w) {
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
- const int skip = m->mbmi.skip_coeff;
+ const int skip = m->mbmi.skip;
vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
return skip;
}
@@ -252,15 +240,15 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
const nmv_context *nmvc = &cm->fc.nmvc;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- struct segmentation *seg = &cm->seg;
+ const struct segmentation *const seg = &cm->seg;
MB_MODE_INFO *const mi = &m->mbmi;
- const MV_REFERENCE_FRAME rf = mi->ref_frame[0];
- const MV_REFERENCE_FRAME sec_rf = mi->ref_frame[1];
+ const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
+ const MV_REFERENCE_FRAME ref1 = mi->ref_frame[1];
const MB_PREDICTION_MODE mode = mi->mode;
const int segment_id = mi->segment_id;
- int skip_coeff;
const BLOCK_SIZE bsize = mi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
+ int skip;
#ifdef ENTROPY_STATS
active_section = 9;
@@ -278,18 +266,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
}
}
- skip_coeff = write_skip_coeff(cpi, segment_id, m, bc);
+ skip = write_skip(cpi, segment_id, m, bc);
if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
- vp9_write(bc, rf != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd));
+ vp9_write(bc, ref0 != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
- !(rf != INTRA_FRAME &&
- (skip_coeff || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
+ !(ref0 != INTRA_FRAME &&
+ (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
write_selected_tx_size(cpi, m, mi->tx_size, bsize, bc);
}
- if (rf == INTRA_FRAME) {
+ if (ref0 == INTRA_FRAME) {
#ifdef ENTROPY_STATS
active_section = 6;
#endif
@@ -311,7 +299,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
} else {
vp9_prob *mv_ref_p;
encode_ref_frame(cpi, bc);
- mv_ref_p = cpi->common.fc.inter_mode_probs[mi->mode_context[rf]];
+ mv_ref_p = cm->fc.inter_mode_probs[mi->mode_context[ref0]];
#ifdef ENTROPY_STATS
active_section = 3;
@@ -321,7 +309,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
if (bsize >= BLOCK_8X8) {
write_inter_mode(bc, mode, mv_ref_p);
- ++cm->counts.inter_mode[mi->mode_context[rf]][INTER_OFFSET(mode)];
+ ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(mode)];
}
}
@@ -341,21 +329,19 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int j = idy * 2 + idx;
- const MB_PREDICTION_MODE blockmode = m->bmi[j].as_mode;
- write_inter_mode(bc, blockmode, mv_ref_p);
- ++cm->counts.inter_mode[mi->mode_context[rf]]
- [INTER_OFFSET(blockmode)];
-
- if (blockmode == NEWMV) {
+ const MB_PREDICTION_MODE b_mode = m->bmi[j].as_mode;
+ write_inter_mode(bc, b_mode, mv_ref_p);
+ ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(b_mode)];
+ if (b_mode == NEWMV) {
#ifdef ENTROPY_STATS
active_section = 11;
#endif
vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
- &mi->ref_mvs[rf][0].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp);
if (has_second_ref(mi))
vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
- &mi->ref_mvs[sec_rf][0].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp);
}
}
}
@@ -364,11 +350,11 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
active_section = 5;
#endif
vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
- &mi->ref_mvs[rf][0].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp);
if (has_second_ref(mi))
vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
- &mi->ref_mvs[sec_rf][0].as_mv, nmvc, allow_hp);
+ &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp);
}
}
}
@@ -387,7 +373,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
if (seg->update_map)
write_segment_id(bc, seg, m->mbmi.segment_id);
- write_skip_coeff(cpi, segment_id, m, bc);
+ write_skip(cpi, segment_id, m, bc);
if (m->mbmi.sb_type >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
write_selected_tx_size(cpi, m, m->mbmi.tx_size, m->mbmi.sb_type, bc);
@@ -555,16 +541,6 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) {
coef_probs[i][j][k][l][m] = get_binary_prob(
coef_branch_ct[i][j][k][l][m][0],
coef_branch_ct[i][j][k][l][m][1]);
-#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing) {
- int t;
- for (t = 0; t < ENTROPY_TOKENS; ++t)
- context_counters[tx_size][i][j][k][l][t] +=
- coef_counts[i][j][k][l][t];
- context_counters[tx_size][i][j][k][l][ENTROPY_TOKENS] +=
- eob_branch_ct[i][j][k][l];
- }
-#endif
}
}
}
@@ -643,10 +619,6 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
if (s > 0 && newp != *oldp)
u = 1;
vp9_write(bc, u, upd);
-#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing)
- ++tree_update_hist[tx_size][i][j][k][l][t][u];
-#endif
if (u) {
/* send/use new probability */
vp9_write_prob_diff_update(bc, newp, *oldp);
@@ -698,10 +670,6 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
updates += u;
if (u == 0 && updates == 0) {
noupdates_before_first++;
-#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing)
- ++tree_update_hist[tx_size][i][j][k][l][t][u];
-#endif
continue;
}
if (u == 1 && updates == 1) {
@@ -712,10 +680,6 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
vp9_write(bc, 0, upd);
}
vp9_write(bc, u, upd);
-#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing)
- ++tree_update_hist[tx_size][i][j][k][l][t][u];
-#endif
if (u) {
/* send/use new probability */
vp9_write_prob_diff_update(bc, newp, *oldp);
@@ -1037,7 +1001,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
vp9_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
// size of this tile
- write_be32(data_ptr + total_size, residual_bc.pos);
+ mem_put_be32(data_ptr + total_size, residual_bc.pos);
total_size += 4;
}
@@ -1287,11 +1251,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
active_section = 7;
#endif
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
first_part_size = write_compressed_header(cpi, data);
data += first_part_size;
- vp9_wb_write_literal(&saved_wb, first_part_size, 16);
+ // TODO(jbb): Figure out what to do if first_part_size > 16 bits.
+ vp9_wb_write_literal(&saved_wb, (int)first_part_size, 16);
data += encode_tiles(cpi, data);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 713cc5132..85f6c97af 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -49,7 +49,6 @@ typedef struct {
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
int rate;
int distortion;
- int64_t intra_error;
int best_mode_index;
int rddiv;
int rdmult;
@@ -63,9 +62,6 @@ typedef struct {
// search loop
int_mv pred_mv[MAX_REF_FRAMES];
INTERP_FILTER pred_interp_filter;
-
- // Bit flag for each mode whether it has high error in comparison to others.
- unsigned int modes_with_high_error;
} PICK_MODE_CONTEXT;
struct macroblock_plane {
@@ -172,9 +168,7 @@ struct macroblock {
int skip_encode;
// Used to store sub partition's choices.
- int fast_ms;
int_mv pred_mv[MAX_REF_FRAMES];
- int subblock_ref;
// TODO(jingning): Need to refactor the structure arrays that buffers the
// coding mode decisions of each partition type.
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index a840b480a..d5232393f 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -18,8 +18,6 @@
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_dct.h"
-
static INLINE int fdct_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX);
@@ -49,7 +47,7 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we tranpose the columns (that
+ // as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
@@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = {
{ fadst4, fadst4 } // ADST_ADST = 3
};
-void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
- int16_t out[4 * 4];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[4], temp_out[4];
- const transform_2d ht = FHT_4[tx_type];
+void vp9_fht4x4_c(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct4x4_c(input, output, stride);
+ } else {
+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
- // Columns
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * stride + i] * 16;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- outptr[j * 4 + i] = temp_out[j];
- }
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * stride + i] * 16;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
+ }
- // Rows
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ }
}
}
@@ -313,7 +315,7 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we tranpose the columns (that
+ // as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
@@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = {
{ fadst8, fadst8 } // ADST_ADST = 3
};
-void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
- int16_t out[64];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[8], temp_out[8];
- const transform_2d ht = FHT_8[tx_type];
-
- // Columns
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- outptr[j * 8 + i] = temp_out[j];
- }
+void vp9_fht8x8_c(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct8x8_c(input, output, stride);
+ } else {
+ int16_t out[64];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
+
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ outptr[j * 8 + i] = temp_out[j];
+ }
- // Rows
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ }
}
}
@@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = {
{ fadst16, fadst16 } // ADST_ADST = 3
};
-void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
- int16_t out[256];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[16], temp_out[16];
- const transform_2d ht = FHT_16[tx_type];
-
- // Columns
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
-// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
- }
+void vp9_fht16x16_c(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct16x16_c(input, output, stride);
+ } else {
+ int16_t out[256];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+ const transform_2d ht = FHT_16[tx_type];
+
+ // Columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+ }
- // Rows
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j + i * 16];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i * 16] = temp_out[j];
+ // Rows
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j + i * 16];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j + i * 16] = temp_out[j];
+ }
}
}
@@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
out[j + i * 32] = temp_out[j];
}
}
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride) {
- if (tx_type == DCT_DCT)
- vp9_fdct4x4(input, output, stride);
- else
- vp9_short_fht4x4(input, output, stride, tx_type);
-}
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride) {
- if (tx_type == DCT_DCT)
- vp9_fdct8x8(input, output, stride);
- else
- vp9_short_fht8x8(input, output, stride, tx_type);
-}
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride) {
- if (tx_type == DCT_DCT)
- vp9_fdct16x16(input, output, stride);
- else
- vp9_short_fht16x16(input, output, stride, tx_type);
-}
diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h
deleted file mode 100644
index cf5f001a9..000000000
--- a/vp9/encoder/vp9_dct.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_DCT_H_
-#define VP9_ENCODER_VP9_DCT_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_VP9_DCT_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 7fb5a03ba..57865138d 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -40,8 +40,6 @@
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_vaq.h"
-#define DBG_PRNT_SEGMAP 0
-
static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
switch (subsize) {
case BLOCK_64X64:
@@ -96,7 +94,8 @@ static const uint8_t VP9_VAR_OFFS[64] = {
128, 128, 128, 128, 128, 128, 128, 128
};
-static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
+static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
+ MACROBLOCK *x,
BLOCK_SIZE bs) {
unsigned int var, sse;
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
@@ -104,6 +103,52 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
+static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int mi_row,
+ int mi_col,
+ BLOCK_SIZE bs) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ int offset = (mi_row * MI_SIZE) * yv12->y_stride + (mi_col * MI_SIZE);
+ unsigned int var, sse;
+ var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
+ x->plane[0].src.stride,
+ yv12->y_buffer + offset,
+ yv12->y_stride,
+ &sse);
+ return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+}
+
+static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi,
+ int mi_row,
+ int mi_col) {
+ unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+ mi_row, mi_col,
+ BLOCK_64X64);
+ if (var < 8)
+ return BLOCK_64X64;
+ else if (var < 128)
+ return BLOCK_32X32;
+ else if (var < 2048)
+ return BLOCK_16X16;
+ else
+ return BLOCK_8X8;
+}
+
+static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi,
+ int mi_row,
+ int mi_col) {
+ unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+ mi_row, mi_col,
+ BLOCK_64X64);
+ if (var < 4)
+ return BLOCK_64X64;
+ else if (var < 10)
+ return BLOCK_32X32;
+ else
+ return BLOCK_16X16;
+}
+
// Original activity measure from Tim T's code.
static unsigned int tt_activity_measure(MACROBLOCK *x) {
unsigned int sse;
@@ -321,7 +366,7 @@ static void build_activity_map(VP9_COMP *cpi) {
}
// Macroblock activity masking
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
+static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
#if USE_ACT_INDEX
x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
@@ -347,7 +392,6 @@ static void select_in_frame_q_segment(VP9_COMP *cpi,
int mi_row, int mi_col,
int output_enabled, int projected_rate) {
VP9_COMMON *const cm = &cpi->common;
- int target_rate = cpi->rc.sb64_target_rate << 8; // convert to bits << 8
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
@@ -364,11 +408,10 @@ static void select_in_frame_q_segment(VP9_COMP *cpi,
} else {
// Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
// It is converted to bits * 256 units
- target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / (bw * bh);
+ const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
+ (bw * bh);
if (projected_rate < (target_rate / 4)) {
- segment = 2;
- } else if (projected_rate < (target_rate / 2)) {
segment = 1;
} else {
segment = 0;
@@ -402,7 +445,6 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
MODE_INFO *mi_addr = xd->mi_8x8[0];
- const int mb_mode_index = ctx->best_mode_index;
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -474,8 +516,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
}
- if (frame_is_intra_only(cm)) {
#if CONFIG_INTERNAL_STATS
+ if (frame_is_intra_only(cm)) {
static const int kf_mode_index[] = {
THR_DC /*DC_PRED*/,
THR_V_PRED /*V_PRED*/,
@@ -488,29 +530,32 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
THR_D63_PRED /*D63_PRED*/,
THR_TM /*TM_PRED*/,
};
- cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]]++;
-#endif
+ ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
} else {
// Note how often each mode chosen as best
- cpi->mode_chosen_counts[mb_mode_index]++;
- if (is_inter_block(mbmi) &&
- (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
- vp9_update_mv_count(cpi, x, best_mv);
- }
+ ++cpi->mode_chosen_counts[ctx->best_mode_index];
+ }
+#endif
+ if (!frame_is_intra_only(cm)) {
+ if (is_inter_block(mbmi)) {
+ if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
- if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
}
cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
}
}
@@ -555,8 +600,6 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
xd->mi_8x8 = cm->mi_grid_visible + idx_str;
xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
- // Special case: if prev_mi is NULL, the previous mode info context
- // cannot be used.
xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
xd->mi_8x8[0] = cm->mi + idx_str;
@@ -613,7 +656,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
} else {
mbmi->segment_id = 0;
- x->encode_breakout = cpi->oxcf.encode_breakout;
+ x->encode_breakout = cpi->encode_breakout;
}
}
@@ -631,7 +674,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
int orig_rdmult = x->rdmult;
double rdmult_ratio;
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
rdmult_ratio = 1.0; // avoid uninitialized warnings
// Use the lower precision, but faster, 32x32 fdct for mode selection.
@@ -660,32 +703,44 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
- xd->mi_8x8[0]->mbmi.skip_coeff = 0;
+ xd->mi_8x8[0]->mbmi.skip = 0;
x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
const int energy = bsize <= BLOCK_16X16 ? x->mb_energy
: vp9_block_energy(cpi, x, bsize);
- xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
+
+ if (cm->frame_type == KEY_FRAME ||
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
+ } else {
+ const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
+ xd->mi_8x8[0]->mbmi.segment_id =
+ vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+ }
+
rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
vp9_mb_init_quantizer(cpi, x);
}
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
- vp9_activity_masking(cpi, x);
+ activity_masking(cpi, x);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state(); // __asm emms;
- x->rdmult = round(x->rdmult * rdmult_ratio);
+ vp9_clear_system_state();
+ x->rdmult = (int)round(x->rdmult * rdmult_ratio);
} else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
const int mi_offset = mi_row * cm->mi_cols + mi_col;
unsigned char complexity = cpi->complexity_map[mi_offset];
- const int is_edge = (mi_row == 0) || (mi_row == (cm->mi_rows - 1)) ||
- (mi_col == 0) || (mi_col == (cm->mi_cols - 1));
+ const int is_edge = (mi_row <= 1) || (mi_row >= (cm->mi_rows - 2)) ||
+ (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2));
- if (!is_edge && (complexity > 128))
+ if (!is_edge && (complexity > 128)) {
x->rdmult = x->rdmult + ((x->rdmult * (complexity - 128)) / 256);
+ }
}
// Find best coding mode & reconstruct the MB so it is available
@@ -705,44 +760,51 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
x->rdmult = orig_rdmult;
if (*totalrate != INT_MAX) {
- vp9_clear_system_state(); // __asm emms;
- *totalrate = round(*totalrate * rdmult_ratio);
+ vp9_clear_system_state();
+ *totalrate = (int)round(*totalrate * rdmult_ratio);
}
}
+ else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ x->rdmult = orig_rdmult;
+ }
}
static void update_stats(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MODE_INFO *mi = xd->mi_8x8[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MACROBLOCK *const x = &cpi->mb;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MODE_INFO *const mi = xd->mi_8x8[0];
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
if (!frame_is_intra_only(cm)) {
const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_REF_FRAME);
+ if (!seg_ref_active) {
+ FRAME_COUNTS *const counts = &cm->counts;
+ const int inter_block = is_inter_block(mbmi);
- if (!seg_ref_active)
- cm->counts.intra_inter[vp9_get_intra_inter_context(xd)]
- [is_inter_block(mbmi)]++;
-
- // If the segment reference feature is enabled we have only a single
- // reference frame allowed for the segment so exclude it from
- // the reference frame counts used to work out probabilities.
- if (is_inter_block(mbmi) && !seg_ref_active) {
- if (cm->reference_mode == REFERENCE_MODE_SELECT)
- cm->counts.comp_inter[vp9_get_reference_mode_context(cm, xd)]
- [has_second_ref(mbmi)]++;
-
- if (has_second_ref(mbmi)) {
- cm->counts.comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
- [mbmi->ref_frame[0] == GOLDEN_FRAME]++;
- } else {
- cm->counts.single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
- [mbmi->ref_frame[0] != LAST_FRAME]++;
- if (mbmi->ref_frame[0] != LAST_FRAME)
- cm->counts.single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
- [mbmi->ref_frame[0] != GOLDEN_FRAME]++;
+ counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
+
+ // If the segment reference feature is enabled we have only a single
+ // reference frame allowed for the segment so exclude it from
+ // the reference frame counts used to work out probabilities.
+ if (inter_block) {
+ const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
+ [has_second_ref(mbmi)]++;
+
+ if (has_second_ref(mbmi)) {
+ counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
+ [ref0 == GOLDEN_FRAME]++;
+ } else {
+ counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
+ [ref0 != LAST_FRAME]++;
+ if (ref0 != LAST_FRAME)
+ counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
+ [ref0 != GOLDEN_FRAME]++;
+ }
}
}
}
@@ -950,9 +1012,9 @@ static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
// may not be allowed in which case this code attempts to choose the largest
// allowable partition.
static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
- MODE_INFO **mi_8x8, int mi_row, int mi_col) {
+ MODE_INFO **mi_8x8, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
- BLOCK_SIZE bsize = cpi->sf.always_this_block_size;
const int mis = cm->mode_info_stride;
int row8x8_remaining = tile->mi_row_end - mi_row;
int col8x8_remaining = tile->mi_col_end - mi_col;
@@ -979,7 +1041,7 @@ static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
int index = block_row * mis + block_col;
// Find a partition size that fits
- bsize = find_partition_size(cpi->sf.always_this_block_size,
+ bsize = find_partition_size(bsize,
(row8x8_remaining - block_row),
(col8x8_remaining - block_col), &bh, &bw);
mi_8x8[index] = mi_upper_left + index;
@@ -1025,38 +1087,19 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
}
return 0;
}
+
static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
BLOCK_SIZE bsize, int output_enabled) {
int i;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = xd->plane;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- const int mb_mode_index = ctx->best_mode_index;
- int max_plane;
-
- max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
- for (i = 0; i < max_plane; ++i) {
- p[i].coeff = ctx->coeff_pbuf[i][1];
- p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
- pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
- p[i].eobs = ctx->eobs_pbuf[i][1];
- }
-
- for (i = max_plane; i < MAX_MB_PLANE; ++i) {
- p[i].coeff = ctx->coeff_pbuf[i][2];
- p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
- pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
- p[i].eobs = ctx->eobs_pbuf[i][2];
- }
-
x->skip = ctx->skip;
- if (frame_is_intra_only(cm)) {
#if CONFIG_INTERNAL_STATS
+ if (frame_is_intra_only(cm)) {
static const int kf_mode_index[] = {
THR_DC /*DC_PRED*/,
THR_V_PRED /*V_PRED*/,
@@ -1070,21 +1113,24 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
THR_TM /*TM_PRED*/,
};
++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
-#endif
} else {
// Note how often each mode chosen as best
- cpi->mode_chosen_counts[mb_mode_index]++;
- if (is_inter_block(mbmi) &&
- (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
- vp9_update_mv_count(cpi, x, best_mv);
- }
+ ++cpi->mode_chosen_counts[ctx->best_mode_index];
+ }
+#endif
+ if (!frame_is_intra_only(cm)) {
+ if (is_inter_block(mbmi)) {
+ if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
- if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
}
}
}
@@ -1111,8 +1157,8 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
}
static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
- TOKENEXTRA **tp, int mi_row, int mi_col,
- int output_enabled, BLOCK_SIZE bsize) {
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
@@ -1130,7 +1176,6 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
mi_row, mi_col, bsize);
subsize = mi_8x8[0]->mbmi.sb_type;
-
} else {
ctx = 0;
subsize = BLOCK_4X4;
@@ -1181,7 +1226,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
subsize);
*get_sb_index(x, subsize) = 3;
encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
- subsize);
+ subsize);
break;
default:
assert("Invalid partition type.");
@@ -1213,13 +1258,14 @@ static void rd_use_partition(VP9_COMP *cpi,
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
int last_part_rate = INT_MAX;
- int64_t last_part_dist = INT_MAX;
- int split_rate = INT_MAX;
- int64_t split_dist = INT_MAX;
+ int64_t last_part_dist = INT64_MAX;
+ int64_t last_part_rd = INT64_MAX;
int none_rate = INT_MAX;
- int64_t none_dist = INT_MAX;
+ int64_t none_dist = INT64_MAX;
+ int64_t none_rd = INT64_MAX;
int chosen_rate = INT_MAX;
- int64_t chosen_dist = INT_MAX;
+ int64_t chosen_dist = INT64_MAX;
+ int64_t chosen_rd = INT64_MAX;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
@@ -1248,10 +1294,8 @@ static void rd_use_partition(VP9_COMP *cpi,
x->mb_energy = vp9_block_energy(cpi, x, bsize);
}
- x->fast_ms = 0;
- x->subblock_ref = 0;
-
- if (cpi->sf.adjust_partitioning_from_last_frame) {
+ if (cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ cpi->sf.adjust_partitioning_from_last_frame) {
// Check if any of the sub blocks are further split.
if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
@@ -1277,7 +1321,11 @@ static void rd_use_partition(VP9_COMP *cpi,
pl = partition_plane_context(cpi->above_seg_context,
cpi->left_seg_context,
mi_row, mi_col, bsize);
- none_rate += x->partition_cost[pl][PARTITION_NONE];
+
+ if (none_rate < INT_MAX) {
+ none_rate += x->partition_cost[pl][PARTITION_NONE];
+ none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist);
+ }
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
mi_8x8[0]->mbmi.sb_type = bs_type;
@@ -1305,9 +1353,9 @@ static void rd_use_partition(VP9_COMP *cpi,
*get_sb_index(x, subsize) = 1;
rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt,
subsize, get_block_context(x, subsize), INT64_MAX);
- if (rt == INT_MAX || dt == INT_MAX) {
+ if (rt == INT_MAX || dt == INT64_MAX) {
last_part_rate = INT_MAX;
- last_part_dist = INT_MAX;
+ last_part_dist = INT64_MAX;
break;
}
@@ -1329,9 +1377,9 @@ static void rd_use_partition(VP9_COMP *cpi,
*get_sb_index(x, subsize) = 1;
rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt,
subsize, get_block_context(x, subsize), INT64_MAX);
- if (rt == INT_MAX || dt == INT_MAX) {
+ if (rt == INT_MAX || dt == INT64_MAX) {
last_part_rate = INT_MAX;
- last_part_dist = INT_MAX;
+ last_part_dist = INT64_MAX;
break;
}
last_part_rate += rt;
@@ -1357,9 +1405,9 @@ static void rd_use_partition(VP9_COMP *cpi,
rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
i != 3);
- if (rt == INT_MAX || dt == INT_MAX) {
+ if (rt == INT_MAX || dt == INT64_MAX) {
last_part_rate = INT_MAX;
- last_part_dist = INT_MAX;
+ last_part_dist = INT64_MAX;
break;
}
last_part_rate += rt;
@@ -1372,16 +1420,19 @@ static void rd_use_partition(VP9_COMP *cpi,
pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
mi_row, mi_col, bsize);
- if (last_part_rate < INT_MAX)
+ if (last_part_rate < INT_MAX) {
last_part_rate += x->partition_cost[pl][partition];
+ last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
+ }
if (cpi->sf.adjust_partitioning_from_last_frame
+ && cpi->sf.partition_search_type == SEARCH_PARTITION
&& partition != PARTITION_SPLIT && bsize > BLOCK_8X8
&& (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
&& (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
- split_rate = 0;
- split_dist = 0;
+ chosen_rate = 0;
+ chosen_dist = 0;
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
// Split partition.
@@ -1408,46 +1459,44 @@ static void rd_use_partition(VP9_COMP *cpi,
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- if (rt == INT_MAX || dt == INT_MAX) {
- split_rate = INT_MAX;
- split_dist = INT_MAX;
+ if (rt == INT_MAX || dt == INT64_MAX) {
+ chosen_rate = INT_MAX;
+ chosen_dist = INT64_MAX;
break;
}
+ chosen_rate += rt;
+ chosen_dist += dt;
+
if (i != 3)
encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0,
split_subsize);
- split_rate += rt;
- split_dist += dt;
pl = partition_plane_context(cpi->above_seg_context,
cpi->left_seg_context,
mi_row + y_idx, mi_col + x_idx,
split_subsize);
- split_rate += x->partition_cost[pl][PARTITION_NONE];
+ chosen_rate += x->partition_cost[pl][PARTITION_NONE];
}
pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
mi_row, mi_col, bsize);
- if (split_rate < INT_MAX) {
- split_rate += x->partition_cost[pl][PARTITION_SPLIT];
-
- chosen_rate = split_rate;
- chosen_dist = split_dist;
+ if (chosen_rate < INT_MAX) {
+ chosen_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
}
}
// If last_part is better set the partitioning to that...
- if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist)
- < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) {
+ if (last_part_rd < chosen_rd) {
mi_8x8[0]->mbmi.sb_type = bsize;
if (bsize >= BLOCK_8X8)
*(get_sb_partitioning(x, bsize)) = subsize;
chosen_rate = last_part_rate;
chosen_dist = last_part_dist;
+ chosen_rd = last_part_rd;
}
// If none was better set the partitioning to that...
- if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)
- > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) {
+ if (none_rd < chosen_rd) {
if (bsize >= BLOCK_8X8)
*(get_sb_partitioning(x, bsize)) = bsize;
chosen_rate = none_rate;
@@ -1459,7 +1508,7 @@ static void rd_use_partition(VP9_COMP *cpi,
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
if ( bsize == BLOCK_64X64)
- assert(chosen_rate < INT_MAX && chosen_dist < INT_MAX);
+ assert(chosen_rate < INT_MAX && chosen_dist < INT64_MAX);
if (do_recon) {
int output_enabled = (bsize == BLOCK_64X64);
@@ -1523,6 +1572,15 @@ static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8,
}
}
+// Next square block size less or equal than current block size.
+static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
+ BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+ BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+ BLOCK_32X32, BLOCK_32X32, BLOCK_32X32,
+ BLOCK_64X64
+};
+
// Look at neighboring blocks and set a min and max partition size based on
// what they chose.
static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
@@ -1589,95 +1647,13 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
row8x8_remaining, col8x8_remaining,
&bh, &bw);
*min_block_size = MIN(*min_block_size, *max_block_size);
-}
-static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) {
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->mb;
-
- // Only use 8x8 result for non HD videos.
- // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
- int use_8x8 = 1;
-
- if (cm->frame_type && !cpi->rc.is_src_frame_alt_ref &&
- ((use_8x8 && bsize == BLOCK_16X16) ||
- bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) {
- int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
- PICK_MODE_CONTEXT *block_context = NULL;
-
- if (bsize == BLOCK_16X16) {
- block_context = x->sb8x8_context[x->sb_index][x->mb_index];
- } else if (bsize == BLOCK_32X32) {
- block_context = x->mb_context[x->sb_index];
- } else if (bsize == BLOCK_64X64) {
- block_context = x->sb32_context;
- }
-
- if (block_context) {
- ref0 = block_context[0].mic.mbmi.ref_frame[0];
- ref1 = block_context[1].mic.mbmi.ref_frame[0];
- ref2 = block_context[2].mic.mbmi.ref_frame[0];
- ref3 = block_context[3].mic.mbmi.ref_frame[0];
- }
-
- // Currently, only consider 4 inter reference frames.
- if (ref0 && ref1 && ref2 && ref3) {
- int d01, d23, d02, d13;
-
- // Motion vectors for the four subblocks.
- int16_t mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row;
- int16_t mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col;
- int16_t mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row;
- int16_t mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col;
- int16_t mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row;
- int16_t mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col;
- int16_t mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row;
- int16_t mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col;
-
- // Adjust sign if ref is alt_ref.
- if (cm->ref_frame_sign_bias[ref0]) {
- mvr0 *= -1;
- mvc0 *= -1;
- }
-
- if (cm->ref_frame_sign_bias[ref1]) {
- mvr1 *= -1;
- mvc1 *= -1;
- }
-
- if (cm->ref_frame_sign_bias[ref2]) {
- mvr2 *= -1;
- mvc2 *= -1;
- }
-
- if (cm->ref_frame_sign_bias[ref3]) {
- mvr3 *= -1;
- mvc3 *= -1;
- }
-
- // Calculate mv distances.
- d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
- d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
- d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
- d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
-
- if (d01 < FAST_MOTION_MV_THRESH && d23 < FAST_MOTION_MV_THRESH &&
- d02 < FAST_MOTION_MV_THRESH && d13 < FAST_MOTION_MV_THRESH) {
- // Set fast motion search level.
- x->fast_ms = 1;
-
- if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
- d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
- // Set fast motion search level.
- x->fast_ms = 2;
-
- if (!d01 && !d23 && !d02 && !d13) {
- x->fast_ms = 3;
- x->subblock_ref = ref0;
- }
- }
- }
- }
+ // When use_square_partition_only is true, make sure at least one square
+ // partition is allowed by selecting the next smaller square size as
+ // *min_block_size.
+ if (cpi->sf.use_square_partition_only &&
+ (*max_block_size - *min_block_size) < 2) {
+ *min_block_size = next_square_size[*min_block_size];
}
}
@@ -1720,8 +1696,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
bsize >= BLOCK_8X8;
int partition_vert_allowed = !force_horz_split && xss <= yss &&
bsize >= BLOCK_8X8;
-
- int partition_split_done = 0;
(void) *tp_orig;
if (bsize < BLOCK_8X8) {
@@ -1863,18 +1837,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (cpi->sf.less_rectangular_check)
do_rect &= !partition_none_allowed;
}
- partition_split_done = 1;
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
- x->fast_ms = 0;
- x->subblock_ref = 0;
-
- if (partition_split_done &&
- cpi->sf.using_small_partition_info) {
- compute_fast_motion_search_level(cpi, bsize);
- }
-
// PARTITION_HORZ
if (partition_horz_allowed && do_rect) {
subsize = get_subsize(bsize, PARTITION_HORZ);
@@ -1979,7 +1944,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
-
+ // TODO(jbb): This code added so that we avoid static analysis
+ // warning related to the fact that best_rd isn't used after this
+ // point. This code should be refactored so that the duplicate
+ // checks occur in some sub function and thus are used...
+ (void) best_rd;
*rate = best_rate;
*dist = best_dist;
@@ -1997,49 +1966,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (bsize == BLOCK_64X64) {
assert(tp_orig < *tp);
assert(best_rate < INT_MAX);
- assert(best_dist < INT_MAX);
+ assert(best_dist < INT64_MAX);
} else {
assert(tp_orig == *tp);
}
}
-// Examines 64x64 block and chooses a best reference frame
-static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, int mi_col) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- int bsl = b_width_log2(BLOCK_64X64), bs = 1 << bsl;
- int ms = bs / 2;
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
- int pl;
- int r;
- int64_t d;
-
- save_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
-
- // Default is non mask (all reference frames allowed.
- cpi->ref_frame_mask = 0;
-
- // Do RD search for 64x64.
- if ((mi_row + (ms >> 1) < cm->mi_rows) &&
- (mi_col + (ms >> 1) < cm->mi_cols)) {
- cpi->set_ref_frame_mask = 1;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64,
- get_block_context(x, BLOCK_64X64), INT64_MAX);
- pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
- mi_row, mi_col, BLOCK_64X64);
- r += x->partition_cost[pl][PARTITION_NONE];
-
- *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64;
- cpi->set_ref_frame_mask = 0;
- }
-
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
-}
-
-static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
+static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common;
int mi_col;
@@ -2055,28 +1989,45 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
BLOCK_SIZE i;
MACROBLOCK *x = &cpi->mb;
- for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
- const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
- const int num_4x4_h = num_4x4_blocks_high_lookup[i];
- const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
- for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index)
- for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index)
- for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index)
- get_block_context(x, i)->pred_interp_filter = SWITCHABLE;
+
+ if (cpi->sf.adaptive_pred_interp_filter) {
+ for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[i];
+ const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
+ for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index)
+ for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index)
+ for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index)
+ get_block_context(x, i)->pred_interp_filter = SWITCHABLE;
+ }
}
vp9_zero(cpi->mb.pred_mv);
- if (cpi->sf.use_lastframe_partitioning ||
- cpi->sf.use_one_partition_size_always ) {
+ if ((cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ cpi->sf.use_lastframe_partitioning) ||
+ cpi->sf.partition_search_type == FIXED_PARTITION ||
+ cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
cpi->mb.source_variance = UINT_MAX;
- if (cpi->sf.use_one_partition_size_always) {
+ if (cpi->sf.partition_search_type == FIXED_PARTITION) {
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+ cpi->sf.always_this_block_size);
+ rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1);
+ } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
+ cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+ // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
+ // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
+ // map to the same thing.
+ BLOCK_SIZE bsize;
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
+ bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
+ set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
} else {
@@ -2183,118 +2134,6 @@ static void switch_tx_mode(VP9_COMP *cpi) {
cpi->common.tx_mode = ALLOW_32X32;
}
-static void encode_frame_internal(VP9_COMP *cpi) {
- int mi_row;
- MACROBLOCK *const x = &cpi->mb;
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
-
-// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-// cpi->common.current_video_frame, cpi->common.show_frame,
-// cm->frame_type);
-
-// debug output
-#if DBG_PRNT_SEGMAP
- {
- FILE *statsfile;
- statsfile = fopen("segmap2.stt", "a");
- fprintf(statsfile, "\n");
- fclose(statsfile);
- }
-#endif
-
- vp9_zero(cm->counts.switchable_interp);
- vp9_zero(cpi->tx_stepdown_count);
-
- xd->mi_8x8 = cm->mi_grid_visible;
- // required for vp9_frame_init_quantizer
- xd->mi_8x8[0] = cm->mi;
-
- xd->last_mi = cm->prev_mi;
-
- vp9_zero(cm->counts.mv);
- vp9_zero(cpi->coef_counts);
- vp9_zero(cm->counts.eob_branch);
-
- cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
- && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
- switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
-
- vp9_frame_init_quantizer(cpi);
-
- vp9_initialize_rd_consts(cpi);
- vp9_initialize_me_consts(cpi, cm->base_qindex);
- switch_tx_mode(cpi);
-
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
- // Initialize encode frame context.
- init_encode_frame_mb_context(cpi);
-
- // Build a frame level activity map
- build_activity_map(cpi);
- }
-
- // Re-initialize encode frame context.
- init_encode_frame_mb_context(cpi);
-
- vp9_zero(cpi->rd_comp_pred_diff);
- vp9_zero(cpi->rd_filter_diff);
- vp9_zero(cpi->rd_tx_select_diff);
- vp9_zero(cpi->rd_tx_select_threshes);
-
- set_prev_mi(cm);
-
- {
- struct vpx_usec_timer emr_timer;
- vpx_usec_timer_start(&emr_timer);
-
- {
- // Take tiles into account and give start/end MB
- int tile_col, tile_row;
- TOKENEXTRA *tp = cpi->tok;
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
-
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- TileInfo tile;
- TOKENEXTRA *tp_old = tp;
-
- // For each row of SBs in the frame
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- for (mi_row = tile.mi_row_start;
- mi_row < tile.mi_row_end; mi_row += 8)
- encode_sb_row(cpi, &tile, mi_row, &tp);
-
- cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
- assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
- }
- }
- }
-
- vpx_usec_timer_mark(&emr_timer);
- cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
- }
-
- if (cpi->sf.skip_encode_sb) {
- int j;
- unsigned int intra_count = 0, inter_count = 0;
- for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
- intra_count += cm->counts.intra_inter[j][0];
- inter_count += cm->counts.intra_inter[j][1];
- }
- cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
- cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
- cpi->sf.skip_encode_frame &= cm->show_frame;
- } else {
- cpi->sf.skip_encode_frame = 0;
- }
-
-#if 0
- // Keep record of the total distortion this time around for future use
- cpi->last_frame_distortion = cpi->frame_distortion;
-#endif
-}
static int check_dual_ref_flags(VP9_COMP *cpi) {
const int ref_flags = cpi->ref_frame_flags;
@@ -2312,7 +2151,7 @@ static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
for (y = 0; y < ymbs; y++) {
for (x = 0; x < xmbs; x++) {
- if (!mi_8x8[y * mis + x]->mbmi.skip_coeff)
+ if (!mi_8x8[y * mis + x]->mbmi.skip)
return 0;
}
}
@@ -2443,6 +2282,7 @@ static void select_tx_mode(VP9_COMP *cpi) {
}
}
}
+
// Start RTC Exploration
typedef enum {
BOTH_ZERO = 0,
@@ -2470,98 +2310,75 @@ static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
mbmi->ref_frame[1] = INTRA_FRAME;
mbmi->tx_size = max_txsize_lookup[bsize];
mbmi->uv_mode = mode;
- mbmi->skip_coeff = 0;
+ mbmi->skip = 0;
mbmi->sb_type = bsize;
mbmi->segment_id = 0;
}
+
static INLINE int get_block_row(int b32i, int b16i, int b8i) {
return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
}
+
static INLINE int get_block_col(int b32i, int b16i, int b8i) {
return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
}
-static void rtc_use_partition(VP9_COMP *cpi,
- const TileInfo *const tile,
- MODE_INFO **mi_8x8,
- TOKENEXTRA **tp, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon) {
+
+static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *rate, int64_t *dist) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- const int mis = cm->mode_info_stride;
- int mi_width = num_8x8_blocks_wide_lookup[cpi->sf.always_this_block_size];
- int mi_height = num_8x8_blocks_high_lookup[cpi->sf.always_this_block_size];
+ int mis = cm->mode_info_stride;
+ int br, bc;
int i, j;
int chosen_rate = INT_MAX;
- int64_t chosen_dist = INT_MAX;
+ int64_t chosen_dist = INT64_MAX;
MB_PREDICTION_MODE mode = DC_PRED;
- int row8x8_remaining = tile->mi_row_end - mi_row;
- int col8x8_remaining = tile->mi_col_end - mi_col;
- int b32i;
- x->fast_ms = 0;
- x->subblock_ref = 0;
- for (b32i = 0; b32i < 4; b32i++) {
- int b16i;
- for (b16i = 0; b16i < 4; b16i++) {
- int b8i;
- int block_row = get_block_row(b32i, b16i, 0);
- int block_col = get_block_col(b32i, b16i, 0);
- int index = block_row * mis + block_col;
- int rate;
- int64_t dist;
-
- int_mv frame_nearest_mv[MAX_REF_FRAMES];
- int_mv frame_near_mv[MAX_REF_FRAMES];
- struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
-
- // Find a partition size that fits
- bsize = find_partition_size(cpi->sf.always_this_block_size,
- (row8x8_remaining - block_row),
- (col8x8_remaining - block_col),
- &mi_height, &mi_width);
- mi_8x8[index] = mi_8x8[0] + index;
-
- set_mi_row_col(xd, tile, mi_row + block_row, mi_height,
- mi_col + block_col, mi_width, cm->mi_rows, cm->mi_cols);
-
- xd->mi_8x8 = mi_8x8 + index;
-
- if (cm->frame_type != KEY_FRAME) {
- set_offsets(cpi, tile, mi_row + block_row, mi_col + block_col, bsize);
-
- vp9_pick_inter_mode(cpi, x, tile,
- mi_row + block_row, mi_col + block_col,
- &rate, &dist, bsize);
- } else {
- set_mode_info(&mi_8x8[index]->mbmi, bsize, mode,
- mi_row + block_row, mi_col + block_col);
- vp9_setup_buffer_inter(cpi, x, tile,
- LAST_FRAME, cpi->sf.always_this_block_size,
- mi_row + block_row, mi_col + block_col,
- frame_nearest_mv, frame_near_mv, yv12_mb);
- }
+ int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row);
+ int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col);
- for (j = 0; j < mi_height; j++)
- for (i = 0; i < mi_width; i++)
- if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > i
- && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > j) {
- mi_8x8[index+ i + j * mis] = mi_8x8[index];
- }
+ int bw = num_8x8_blocks_wide_lookup[bsize];
+ int bh = num_8x8_blocks_high_lookup[bsize];
- for (b8i = 0; b8i < 4; b8i++) {
- }
+ int brate = 0;
+ int64_t bdist = 0;
+ *rate = 0;
+ *dist = 0;
+
+ // find prediction mode for each 8x8 block
+ for (br = 0; br < rows; br += bh) {
+ for (bc = 0; bc < cols; bc += bw) {
+ int row = mi_row + br;
+ int col = mi_col + bc;
+
+ BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
+ &bh, &bw);
+ set_offsets(cpi, tile, row, col, bs);
+
+ if (cm->frame_type != KEY_FRAME)
+ vp9_pick_inter_mode(cpi, x, tile, row, col, &brate, &bdist, bs);
+ else
+ set_mode_info(&xd->mi_8x8[0]->mbmi, bs, mode, row, col);
+
+ *rate += brate;
+ *dist += bdist;
+
+ for (j = 0; j < bh; ++j)
+ for (i = 0; i < bw; ++i) {
+ xd->mi_8x8[j * mis + i] = xd->mi_8x8[0];
+ }
}
}
- encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
*rate = chosen_rate;
*dist = chosen_dist;
+
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
}
-static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
- VP9_COMMON * const cm = &cpi->common;
+static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, TOKENEXTRA **tp) {
int mi_col;
// Initialize the left context for the new SB row
@@ -2574,38 +2391,39 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int dummy_rate;
int64_t dummy_dist;
- const int idx_str = cm->mode_info_stride * mi_row + mi_col;
- MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-
cpi->mb.source_variance = UINT_MAX;
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
- rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1);
+
+ if (cpi->sf.partition_search_type == FIXED_PARTITION) {
+ nonrd_use_partition(cpi, tile, tp, mi_row, mi_col,
+ cpi->sf.always_this_block_size,
+ &dummy_rate, &dummy_dist);
+ } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
+ cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+ // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
+ // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
+ // map to the same thing.
+ BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi,
+ mi_row,
+ mi_col);
+ nonrd_use_partition(cpi, tile, tp, mi_row, mi_col,
+ bsize, &dummy_rate, &dummy_dist);
+ } else {
+ assert(0);
+ }
}
}
+// end RTC play code
-
-static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+static void encode_frame_internal(VP9_COMP *cpi) {
int mi_row;
- MACROBLOCK * const x = &cpi->mb;
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCKD * const xd = &x->e_mbd;
+ MACROBLOCK *const x = &cpi->mb;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
// cpi->common.current_video_frame, cpi->common.show_frame,
// cm->frame_type);
-// debug output
-#if DBG_PRNT_SEGMAP
- {
- FILE *statsfile;
- statsfile = fopen("segmap2.stt", "a");
- fprintf(statsfile, "\n");
- fclose(statsfile);
- }
-#endif
-
vp9_zero(cm->counts.switchable_interp);
vp9_zero(cpi->tx_stepdown_count);
@@ -2615,7 +2433,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
xd->last_mi = cm->prev_mi;
- vp9_zero(cpi->common.counts.mv);
+ vp9_zero(cm->counts.mv);
vp9_zero(cpi->coef_counts);
vp9_zero(cm->counts.eob_branch);
@@ -2628,7 +2446,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
vp9_initialize_rd_consts(cpi);
vp9_initialize_me_consts(cpi, cm->base_qindex);
switch_tx_mode(cpi);
- cpi->sf.always_this_block_size = BLOCK_16X16;
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
// Initialize encode frame context.
@@ -2648,6 +2465,22 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
set_prev_mi(cm);
+ if (cpi->sf.use_nonrd_pick_mode) {
+ // Initialize internal buffer pointers for rtc coding, where non-RD
+ // mode decision is used and hence no buffer pointer swap needed.
+ int i;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ PICK_MODE_CONTEXT *ctx = &cpi->mb.sb64_context;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][0];
+ p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
+ p[i].eobs = ctx->eobs_pbuf[i][0];
+ }
+ }
+
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
@@ -2667,9 +2500,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
// For each row of SBs in the frame
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
- mi_row < tile.mi_row_end; mi_row += 8)
- encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
-
+ mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
+ if (cpi->sf.use_nonrd_pick_mode)
+ encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
+ else
+ encode_rd_sb_row(cpi, &tile, mi_row, &tp);
+ }
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
}
@@ -2699,8 +2535,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}
-// end RTC play code
-
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -2725,7 +2559,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
}
}
- if (cpi->sf.RD) {
+ if (cpi->sf.frame_parameter_update) {
int i;
REFERENCE_MODE reference_mode;
/*
@@ -2775,10 +2609,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
select_tx_mode(cpi);
cm->reference_mode = reference_mode;
- if (cpi->sf.super_fast_rtc)
- encode_rtc_frame_internal(cpi);
- else
- encode_frame_internal(cpi);
+ encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i) {
const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2858,10 +2689,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
} else {
// Force the usage of the BILINEAR interp_filter.
cm->interp_filter = BILINEAR;
- if (cpi->sf.super_fast_rtc)
- encode_rtc_frame_internal(cpi);
- else
- encode_frame_internal(cpi);
+ encode_frame_internal(cpi);
}
}
@@ -2936,9 +2764,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
+
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
(cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
- !cpi->sf.super_fast_rtc;
+ !cpi->sf.use_nonrd_pick_mode;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
@@ -2969,11 +2798,13 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
}
if (!is_inter_block(mbmi)) {
- mbmi->skip_coeff = 1;
- vp9_encode_intra_block_y(x, MAX(bsize, BLOCK_8X8));
- vp9_encode_intra_block_uv(x, MAX(bsize, BLOCK_8X8));
+ int plane;
+ mbmi->skip = 1;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ vp9_encode_intra_block_plane(x, MAX(bsize, BLOCK_8X8), plane);
if (output_enabled)
sum_intra_stats(&cm->counts, mi);
+ vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
} else {
int ref;
const int is_compound = has_second_ref(mbmi);
@@ -2983,26 +2814,24 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf);
}
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
- }
- if (!is_inter_block(mbmi)) {
- vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
- } else if (!x->skip) {
- mbmi->skip_coeff = 1;
- vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
- vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
- } else {
- mbmi->skip_coeff = 1;
- if (output_enabled)
- cm->counts.skip[vp9_get_skip_context(xd)][1]++;
- reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
+ if (!x->skip) {
+ mbmi->skip = 1;
+ vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
+ vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
+ } else {
+ mbmi->skip = 1;
+ if (output_enabled)
+ cm->counts.skip[vp9_get_skip_context(xd)][1]++;
+ reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
+ }
}
if (output_enabled) {
if (cm->tx_mode == TX_MODE_SELECT &&
mbmi->sb_type >= BLOCK_8X8 &&
!(is_inter_block(mbmi) &&
- (mbmi->skip_coeff ||
+ (mbmi->skip ||
vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
&cm->counts.tx)[mbmi->tx_size];
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 376a899e0..13eabe05d 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -19,29 +19,39 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_tokenize.h"
+struct optimize_ctx {
+ ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
+ ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
+};
+
+struct encode_b_args {
+ MACROBLOCK *x;
+ struct optimize_ctx *ctx;
+ unsigned char *skip;
+};
+
void vp9_subtract_block_c(int rows, int cols,
- int16_t *diff_ptr, ptrdiff_t diff_stride,
- const uint8_t *src_ptr, ptrdiff_t src_stride,
- const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
+ int16_t *diff, ptrdiff_t diff_stride,
+ const uint8_t *src, ptrdiff_t src_stride,
+ const uint8_t *pred, ptrdiff_t pred_stride) {
int r, c;
for (r = 0; r < rows; r++) {
for (c = 0; c < cols; c++)
- diff_ptr[c] = src_ptr[c] - pred_ptr[c];
+ diff[c] = src[c] - pred[c];
- diff_ptr += diff_stride;
- pred_ptr += pred_stride;
- src_ptr += src_stride;
+ diff += diff_stride;
+ pred += pred_stride;
+ src += src_stride;
}
}
-static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
@@ -52,22 +62,6 @@ static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
pd->dst.buf, pd->dst.stride);
}
-void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
- subtract_plane(x, bsize, 0);
-}
-
-void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) {
- int i;
-
- for (i = 1; i < MAX_MB_PLANE; i++)
- subtract_plane(x, bsize, i);
-}
-
-void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
- vp9_subtract_sby(x, bsize);
- vp9_subtract_sbuv(x, bsize);
-}
-
#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
typedef struct vp9_token_state vp9_token_state;
@@ -111,19 +105,18 @@ static int trellis_get_coeff_context(const int16_t *scan,
return pt;
}
-static void optimize_b(MACROBLOCK *mb,
- int plane, int block, BLOCK_SIZE plane_bsize,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- TX_SIZE tx_size) {
+static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, MACROBLOCK *mb,
+ struct optimize_ctx *ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *p = &mb->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
- const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block);
- int16_t *qcoeff_ptr;
- int16_t *dqcoeff_ptr;
+ const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+ int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
int eob = p->eobs[block], final_eob, sz = 0;
const int i0 = 0;
int rc, x, next, i;
@@ -133,7 +126,6 @@ static void optimize_b(MACROBLOCK *mb,
PLANE_TYPE type = pd->plane_type;
int err_mult = plane_rd_mult[type];
const int default_eob = 16 << (tx_size << 1);
-
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
const int16_t *dequant_ptr = pd->dequant;
@@ -141,10 +133,13 @@ static void optimize_b(MACROBLOCK *mb,
const scan_order *so = get_scan(xd, tx_size, type, block);
const int16_t *scan = so->scan;
const int16_t *nb = so->neighbors;
+ ENTROPY_CONTEXT *a, *l;
+ int tx_x, tx_y;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &tx_x, &tx_y);
+ a = &ctx->ta[plane][tx_x];
+ l = &ctx->tl[plane][tx_y];
assert((!type && !plane) || (type && plane));
- dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
- qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -162,13 +157,13 @@ static void optimize_b(MACROBLOCK *mb,
next = eob;
for (i = 0; i < eob; i++)
token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
- qcoeff_ptr[scan[i]]].token];
+ qcoeff[scan[i]]].token];
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
rc = scan[i];
- x = qcoeff_ptr[rc];
+ x = qcoeff[rc];
/* Only add a trellis state for non-zero coefficients. */
if (x) {
int shortcut = 0;
@@ -193,7 +188,7 @@ static void optimize_b(MACROBLOCK *mb,
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = *(vp9_dct_value_cost_ptr + x);
- dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
+ dx = mul * (dqcoeff[rc] - coeff[rc]);
d2 = dx * dx;
tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
tokens[i][0].error = d2 + (best ? error1 : error0);
@@ -206,8 +201,8 @@ static void optimize_b(MACROBLOCK *mb,
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
- (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
+ if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+ (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
dequant_ptr[rc != 0]))
shortcut = 1;
else
@@ -296,16 +291,16 @@ static void optimize_b(MACROBLOCK *mb,
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = i0 - 1;
- vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
- vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
+ vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
+ vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
for (i = next; i < eob; i = next) {
x = tokens[i][best].qc;
if (x) {
final_eob = i;
}
rc = scan[i];
- qcoeff_ptr[rc] = x;
- dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
+ qcoeff[rc] = x;
+ dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
next = tokens[i][best].next;
best = best_index[i][best];
@@ -316,60 +311,39 @@ static void optimize_b(MACROBLOCK *mb,
*a = *l = (final_eob > 0);
}
-void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) {
- int x, y;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
- optimize_b(mb, plane, block, plane_bsize,
- &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size);
+static INLINE void fdct32x32(int rd_transform,
+ const int16_t *src, int16_t *dst, int src_stride) {
+ if (rd_transform)
+ vp9_fdct32x32_rd(src, dst, src_stride);
+ else
+ vp9_fdct32x32(src, dst, src_stride);
}
-static void optimize_init_b(int plane, BLOCK_SIZE bsize,
- struct encode_b_args *args) {
- const MACROBLOCKD *xd = &args->x->e_mbd;
- const struct macroblockd_plane* const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
- const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
-
- vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
- pd->above_context, pd->left_context,
- num_4x4_w, num_4x4_h);
-}
-void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- struct encode_b_args* const args = arg;
- MACROBLOCK* const x = args->x;
- MACROBLOCKD* const xd = &x->e_mbd;
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
- int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const scan_order *scan_order;
- uint16_t *eob = &p->eobs[block];
+void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+ int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int i, j;
- int16_t *src_diff;
+ const int16_t *src_diff;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
switch (tx_size) {
case TX_32X32:
- scan_order = &vp9_default_scan_orders[TX_32X32];
- if (x->use_lp32x32fdct)
- vp9_fdct32x32_rd(src_diff, coeff, diff_stride);
- else
- vp9_fdct32x32(src_diff, coeff, diff_stride);
+ fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_16X16:
- scan_order = &vp9_default_scan_orders[TX_16X16];
vp9_fdct16x16(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
@@ -377,7 +351,6 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
- scan_order = &vp9_default_scan_orders[TX_8X8];
vp9_fdct8x8(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
@@ -385,7 +358,6 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
- scan_order = &vp9_default_scan_orders[TX_4X4];
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
@@ -421,17 +393,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
}
if (!x->skip_recode)
- vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
+ vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
+ optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
} else {
ctx->ta[plane][i] = p->eobs[block] > 0;
ctx->tl[plane][j] = p->eobs[block] > 0;
}
if (p->eobs[block])
- *(args->skip_coeff) = 0;
+ *(args->skip) = 0;
if (x->skip_encode || p->eobs[block] == 0)
return;
@@ -458,8 +430,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
}
static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
- struct encode_b_args *const args = arg;
- MACROBLOCK *const x = args->x;
+ MACROBLOCK *const x = (MACROBLOCK *)arg;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -469,48 +440,43 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
- vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
+ vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
- if (p->eobs[block] == 0)
- return;
-
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ if (p->eobs[block] > 0)
+ xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
}
-void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- struct optimize_ctx ctx;
- MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip_coeff};
-
- vp9_subtract_sby(x, bsize);
- if (x->optimize)
- optimize_init_b(0, bsize, &arg);
-
- vp9_foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1,
- &arg);
+void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ vp9_subtract_plane(x, bsize, 0);
+ vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
+ encode_block_pass1, x);
}
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip_coeff};
-
- if (!x->skip_recode)
- vp9_subtract_sb(x, bsize);
+ struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+ int plane;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ if (!x->skip_recode)
+ vp9_subtract_plane(x, bsize, plane);
+
+ if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
+ const struct macroblockd_plane* const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
+ vp9_get_entropy_contexts(bsize, tx_size, pd,
+ ctx.ta[plane], ctx.tl[plane]);
+ }
- if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; ++i)
- optimize_init_b(i, bsize, &arg);
+ vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
}
-
- vp9_foreach_transformed_block(xd, bsize, encode_block, &arg);
}
-void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
+static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -528,14 +494,16 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
uint8_t *src, *dst;
int16_t *src_diff;
uint16_t *eob = &p->eobs[block];
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
int i, j;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)];
- src = &p->src.buf[4 * (j * p->src.stride + i)];
+ dst = &pd->dst.buf[4 * (j * dst_stride + i)];
+ src = &p->src.buf[4 * (j * src_stride + i)];
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
// if (x->optimize)
- // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
+ // optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
switch (tx_size) {
case TX_32X32:
@@ -543,22 +511,19 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
x->skip_encode ? src : dst,
- x->skip_encode ? p->src.stride : pd->dst.stride,
- dst, pd->dst.stride, i, j, plane);
+ x->skip_encode ? src_stride : dst_stride,
+ dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_subtract_block(32, 32, src_diff, diff_stride,
- src, p->src.stride, dst, pd->dst.stride);
- if (x->use_lp32x32fdct)
- vp9_fdct32x32_rd(src_diff, coeff, diff_stride);
- else
- vp9_fdct32x32(src_diff, coeff, diff_stride);
+ src, src_stride, dst, dst_stride);
+ fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
- vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
+ vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
break;
case TX_16X16:
tx_type = get_tx_type_16x16(pd->plane_type, xd);
@@ -566,19 +531,19 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
x->skip_encode ? src : dst,
- x->skip_encode ? p->src.stride : pd->dst.stride,
- dst, pd->dst.stride, i, j, plane);
+ x->skip_encode ? src_stride : dst_stride,
+ dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_subtract_block(16, 16, src_diff, diff_stride,
- src, p->src.stride, dst, pd->dst.stride);
- vp9_fht16x16(tx_type, src_diff, coeff, diff_stride);
+ src, src_stride, dst, dst_stride);
+ vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
- vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
+ vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
break;
case TX_8X8:
tx_type = get_tx_type_8x8(pd->plane_type, xd);
@@ -586,19 +551,19 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
x->skip_encode ? src : dst,
- x->skip_encode ? p->src.stride : pd->dst.stride,
- dst, pd->dst.stride, i, j, plane);
+ x->skip_encode ? src_stride : dst_stride,
+ dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_subtract_block(8, 8, src_diff, diff_stride,
- src, p->src.stride, dst, pd->dst.stride);
- vp9_fht8x8(tx_type, src_diff, coeff, diff_stride);
+ src, src_stride, dst, dst_stride);
+ vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
- vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
+ vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
break;
case TX_4X4:
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
@@ -610,14 +575,14 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
x->skip_encode ? src : dst,
- x->skip_encode ? p->src.stride : pd->dst.stride,
- dst, pd->dst.stride, i, j, plane);
+ x->skip_encode ? src_stride : dst_stride,
+ dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_subtract_block(4, 4, src_diff, diff_stride,
- src, p->src.stride, dst, pd->dst.stride);
+ src, src_stride, dst, dst_stride);
if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
@@ -631,33 +596,32 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob);
+ xd->itxm_add(dqcoeff, dst, dst_stride, *eob);
else
- vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type);
+ vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
}
break;
default:
assert(0);
}
if (*eob)
- *(args->skip_coeff) = 0;
+ *(args->skip) = 0;
}
-void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) {
- MACROBLOCKD* const xd = &x->e_mbd;
- struct optimize_ctx ctx;
- MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip_coeff};
-
- vp9_foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra,
- &arg);
+void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ unsigned char *skip) {
+ struct encode_b_args arg = {x, NULL, skip};
+ encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
}
-void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) {
- MACROBLOCKD* const xd = &x->e_mbd;
- struct optimize_ctx ctx;
- MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip_coeff};
- vp9_foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg);
+
+
+void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip};
+
+ vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
+ &arg);
}
int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
@@ -668,6 +632,6 @@ int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
: TX_8X8)
: TX_4X4;
- vp9_encode_intra_block_y(x, mbmi->sb_type);
+ vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
return vp9_get_mb_ss(x->plane[0].src_diff);
}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 9f6c9f069..dcf6e8759 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -20,32 +20,19 @@
extern "C" {
#endif
-struct optimize_ctx {
- ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
- ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
-};
-
-struct encode_b_args {
- MACROBLOCK *x;
- struct optimize_ctx *ctx;
- unsigned char *skip_coeff;
-};
-
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg);
+void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
-void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg);
+void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ unsigned char *skip);
-void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index af710a8f4..be6abc2a1 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -224,18 +224,11 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w,
}
}
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
- const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h) {
- vp9_clear_system_state();
- vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree);
- if (mvc_flag_v)
- build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
- if (mvc_flag_h)
- build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context* ctx, int usehp) {
+ vp9_cost_tokens(mvjoint, ctx->joints, vp9_mv_joint_tree);
+ build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
+ build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound,
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index f0463bbd3..7f997ff37 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -25,12 +25,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w);
void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp);
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
- const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h);
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context* mvctx, int usehp);
void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 153046440..32ed96999 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -49,8 +49,9 @@
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
-#define MIN_BOOST 300
-#define KEY_FRAME_BOOST 2000
+#define MIN_KF_BOOST 300
+
+#define DISABLE_RC_LONG_TERM_MEM 0
static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
YV12_BUFFER_CONFIG temp = *a;
@@ -64,7 +65,7 @@ static int select_cq_level(int qindex) {
double target_q = (vp9_convert_qindex_to_q(qindex) * 0.5847) + 1.0;
- for (i = 0; i < QINDEX_RANGE; i++) {
+ for (i = 0; i < QINDEX_RANGE; ++i) {
if (target_q <= vp9_convert_qindex_to_q(i)) {
ret_val = i;
break;
@@ -105,12 +106,12 @@ static int lookup_next_frame_stats(const struct twopass_rc *p,
}
-// Read frame stats at an offset from the current position
+// Read frame stats at an offset from the current position.
static int read_frame_stats(const struct twopass_rc *p,
FIRSTPASS_STATS *frame_stats, int offset) {
const FIRSTPASS_STATS *fps_ptr = p->stats_in;
- // Check legality of offset
+ // Check legality of offset.
if (offset >= 0) {
if (&fps_ptr[offset] >= p->stats_in_end)
return EOF;
@@ -132,9 +133,9 @@ static int input_stats(struct twopass_rc *p, FIRSTPASS_STATS *fps) {
return 1;
}
-static void output_stats(const VP9_COMP *cpi,
+static void output_stats(const VP9_COMP *cpi,
struct vpx_codec_pkt_list *pktlist,
- FIRSTPASS_STATS *stats) {
+ FIRSTPASS_STATS *stats) {
struct vpx_codec_cx_pkt pkt;
pkt.kind = VPX_CODEC_STATS_PKT;
pkt.data.twopass_stats.buf = stats;
@@ -143,7 +144,6 @@ static void output_stats(const VP9_COMP *cpi,
// TEMP debug code
#if OUTPUT_FPF
-
{
FILE *fpfile;
fpfile = fopen("firstpass.stt", "a");
@@ -265,9 +265,9 @@ static void avg_stats(FIRSTPASS_STATS *section) {
// Calculate a modified Error used in distributing bits between easier and
// harder frames.
-static double calculate_modified_err(VP9_COMP *cpi,
- FIRSTPASS_STATS *this_frame) {
- struct twopass_rc *const twopass = &cpi->twopass;
+static double calculate_modified_err(const VP9_COMP *cpi,
+ const FIRSTPASS_STATS *this_frame) {
+ const struct twopass_rc *const twopass = &cpi->twopass;
const FIRSTPASS_STATS *const stats = &twopass->total_stats;
const double av_err = stats->ssim_weighted_pred_err / stats->count;
double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
@@ -336,7 +336,7 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) {
}
// This function returns the maximum target rate per frame.
-static int frame_max_bits(VP9_COMP *cpi) {
+static int frame_max_bits(const VP9_COMP *cpi) {
int64_t max_bits =
((int64_t)cpi->rc.av_per_frame_bandwidth *
(int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100;
@@ -376,7 +376,6 @@ static unsigned int zz_motion_search(const VP9_COMP *cpi, const MACROBLOCK *x) {
const int src_stride = x->plane[0].src.stride;
const uint8_t *const ref = xd->plane[0].pre[0].buf;
const int ref_stride = xd->plane[0].pre[0].stride;
-
unsigned int sse;
vp9_variance_fn_t fn = get_block_variance_fn(xd->mi_8x8[0]->mbmi.sb_type);
fn(src, src_stride, ref, ref_stride, &sse);
@@ -397,18 +396,18 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int new_mv_mode_penalty = 256;
const int quart_frm = MIN(cpi->common.width, cpi->common.height);
- // refine the motion search range accroding to the frame dimension
- // for first pass test
+ // Refine the motion search range according to the frame dimension
+ // for first pass test.
while ((quart_frm << sr) < MAX_FULL_PEL_VAL)
- sr++;
+ ++sr;
step_param += sr;
further_steps -= sr;
- // override the default variance function to use MSE
+ // Override the default variance function to use MSE.
v_fn_ptr.vf = get_block_variance_fn(bsize);
- // Initial step/diamond search centred on best mv
+ // Center the initial step/diamond search on best mv.
tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
step_param,
x->sadperbit16, &num00, &v_fn_ptr,
@@ -423,15 +422,15 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
best_mv->col = tmp_mv.col;
}
- // Further step/diamond searches as necessary
+ // Carry out further step/diamond searches as necessary.
n = num00;
num00 = 0;
while (n < further_steps) {
- n++;
+ ++n;
if (num00) {
- num00--;
+ --num00;
} else {
tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
step_param + n, x->sadperbit16,
@@ -468,7 +467,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
TileInfo tile;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
- PICK_MODE_CONTEXT *ctx = &x->sb64_context;
+ const PICK_MODE_CONTEXT *ctx = &x->sb64_context;
int i;
int recon_yoffset, recon_uvoffset;
@@ -496,14 +495,14 @@ void vp9_first_pass(VP9_COMP *cpi) {
struct twopass_rc *const twopass = &cpi->twopass;
const MV zero_mv = {0, 0};
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
vp9_setup_src_planes(x, cpi->Source, 0, 0);
setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL);
setup_dst_planes(xd, new_yv12, 0, 0);
xd->mi_8x8 = cm->mi_grid_visible;
- xd->mi_8x8[0] = cm->mi; // required for vp9_frame_init_quantizer
+ xd->mi_8x8[0] = cm->mi;
vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
@@ -520,34 +519,32 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp9_init_mv_probs(cm);
vp9_initialize_rd_consts(cpi);
- // tiling is ignored in the first pass
+ // Tiling is ignored in the first pass.
vp9_tile_init(&tile, cm, 0, 0);
- // for each macroblock row in image
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
int_mv best_ref_mv;
best_ref_mv.as_int = 0;
- // reset above block coeffs
+ // Reset above block coeffs.
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
// Set up limit values for motion vectors to prevent them extending
- // outside the UMV borders
+ // outside the UMV borders.
x->mv_row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ BORDER_MV_PIXELS_B16;
- // for each macroblock col in image
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
int this_error;
const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
double error_weight = 1.0;
const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
@@ -565,15 +562,15 @@ void vp9_first_pass(VP9_COMP *cpi) {
error_weight = vp9_vaq_inv_q_ratio(energy);
}
- // do intra 16x16 prediction
+ // Do intra 16x16 prediction.
this_error = vp9_encode_intra(x, use_dc_pred);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state(); // __asm emms;
- this_error *= error_weight;
+ vp9_clear_system_state();
+ this_error = (int)(this_error * error_weight);
}
- // intrapenalty below deals with situations where the intra and inter
- // error scores are very low (eg a plain black frame).
+ // Intrapenalty below deals with situations where the intra and inter
+ // error scores are very low (e.g. a plain black frame).
// We do not have special cases in first pass for 0,0 and nearest etc so
// all inter modes carry an overhead cost estimate for the mv.
// When the error score is very low this causes us to pick all or lots of
@@ -581,7 +578,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
// This penalty adds a cost matching that of a 0,0 mv to the intra case.
this_error += intrapenalty;
- // Cumulative intra error total
+ // Accumulate the intra error.
intra_error += (int64_t)this_error;
// Set up limit values for motion vectors to prevent them extending
@@ -589,23 +586,23 @@ void vp9_first_pass(VP9_COMP *cpi) {
x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
- // Other than for the first frame do a motion search
+ // Other than for the first frame do a motion search.
if (cm->current_video_frame > 0) {
int tmp_err, motion_error;
int_mv mv, tmp_mv;
xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
motion_error = zz_motion_search(cpi, x);
- // Simple 0,0 motion with no mv overhead
+ // Assume 0,0 motion with no mv overhead.
mv.as_int = tmp_mv.as_int = 0;
// Test last reference frame using the previous best mv as the
- // starting point (best reference) for the search
+ // starting point (best reference) for the search.
first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
&motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state(); // __asm emms;
- motion_error *= error_weight;
+ vp9_clear_system_state();
+ motion_error = (int)(motion_error * error_weight);
}
// If the current best reference mv is not centered on 0,0 then do a 0,0
@@ -615,8 +612,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
&tmp_err);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state(); // __asm emms;
- tmp_err *= error_weight;
+ vp9_clear_system_state();
+ tmp_err = (int)(tmp_err * error_weight);
}
if (tmp_err < motion_error) {
@@ -625,9 +622,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
}
}
- // Experimental search in an older reference frame
+ // Search in an older reference frame.
if (cm->current_video_frame > 1) {
- // Simple 0,0 motion with no mv overhead
+ // Assume 0,0 motion with no mv overhead.
int gf_motion_error;
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
@@ -636,22 +633,22 @@ void vp9_first_pass(VP9_COMP *cpi) {
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
&gf_motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state(); // __asm emms;
- gf_motion_error *= error_weight;
+ vp9_clear_system_state();
+ gf_motion_error = (int)(gf_motion_error * error_weight);
}
if (gf_motion_error < motion_error && gf_motion_error < this_error)
- second_ref_count++;
+ ++second_ref_count;
- // Reset to last frame as reference buffer
+ // Reset to last frame as reference buffer.
xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset;
xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset;
- // In accumulating a score for the older reference frame
- // take the best of the motion predicted score and
- // the intra coded error (just as will be done for)
- // accumulation of "coded_error" for the last frame.
+ // In accumulating a score for the older reference frame take the
+ // best of the motion predicted score and the intra coded error
+ // (just as will be done for) accumulation of "coded_error" for
+ // the last frame.
if (gf_motion_error < this_error)
sr_coded_error += gf_motion_error;
else
@@ -659,17 +656,16 @@ void vp9_first_pass(VP9_COMP *cpi) {
} else {
sr_coded_error += motion_error;
}
- /* Intra assumed best */
+ // Start by assuming that intra mode is best.
best_ref_mv.as_int = 0;
if (motion_error <= this_error) {
- // Keep a count of cases where the inter and intra were
- // very close and very low. This helps with scene cut
- // detection for example in cropped clips with black bars
- // at the sides or top and bottom.
+ // Keep a count of cases where the inter and intra were very close
+ // and very low. This helps with scene cut detection for example in
+ // cropped clips with black bars at the sides or top and bottom.
if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
this_error < 2 * intrapenalty)
- neutral_count++;
+ ++neutral_count;
mv.as_mv.row *= 8;
mv.as_mv.col *= 8;
@@ -679,50 +675,49 @@ void vp9_first_pass(VP9_COMP *cpi) {
xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE;
vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
- vp9_encode_sby(x, bsize);
+ vp9_encode_sby_pass1(x, bsize);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
sum_mvc_abs += abs(mv.as_mv.col);
sum_mvrs += mv.as_mv.row * mv.as_mv.row;
sum_mvcs += mv.as_mv.col * mv.as_mv.col;
- intercount++;
+ ++intercount;
best_ref_mv.as_int = mv.as_int;
- // Was the vector non-zero
if (mv.as_int) {
- mvcount++;
+ ++mvcount;
- // Was it different from the last non zero vector
+ // Non-zero vector, was it different from the last non zero vector?
if (mv.as_int != lastmv_as_int)
- new_mv_count++;
+ ++new_mv_count;
lastmv_as_int = mv.as_int;
- // Does the Row vector point inwards or outwards
+ // Does the row vector point inwards or outwards?
if (mb_row < cm->mb_rows / 2) {
if (mv.as_mv.row > 0)
- sum_in_vectors--;
+ --sum_in_vectors;
else if (mv.as_mv.row < 0)
- sum_in_vectors++;
+ ++sum_in_vectors;
} else if (mb_row > cm->mb_rows / 2) {
if (mv.as_mv.row > 0)
- sum_in_vectors++;
+ ++sum_in_vectors;
else if (mv.as_mv.row < 0)
- sum_in_vectors--;
+ --sum_in_vectors;
}
- // Does the Row vector point inwards or outwards
+ // Does the col vector point inwards or outwards?
if (mb_col < cm->mb_cols / 2) {
if (mv.as_mv.col > 0)
- sum_in_vectors--;
+ --sum_in_vectors;
else if (mv.as_mv.col < 0)
- sum_in_vectors++;
+ ++sum_in_vectors;
} else if (mb_col > cm->mb_cols / 2) {
if (mv.as_mv.col > 0)
- sum_in_vectors++;
+ ++sum_in_vectors;
else if (mv.as_mv.col < 0)
- sum_in_vectors--;
+ --sum_in_vectors;
}
}
}
@@ -731,7 +726,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
}
coded_error += (int64_t)this_error;
- // adjust to the next column of macroblocks
+ // Adjust to the next column of MBs.
x->plane[0].src.buf += 16;
x->plane[1].src.buf += uv_mb_height;
x->plane[2].src.buf += uv_mb_height;
@@ -740,24 +735,24 @@ void vp9_first_pass(VP9_COMP *cpi) {
recon_uvoffset += uv_mb_height;
}
- // adjust to the next row of mbs
+ // Adjust to the next row of MBs.
x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
x->plane[1].src.buf += uv_mb_height * x->plane[1].src.stride -
uv_mb_height * cm->mb_cols;
x->plane[2].src.buf += uv_mb_height * x->plane[1].src.stride -
uv_mb_height * cm->mb_cols;
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
}
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
{
FIRSTPASS_STATS fps;
fps.frame = cm->current_video_frame;
- fps.intra_error = intra_error >> 8;
- fps.coded_error = coded_error >> 8;
- fps.sr_coded_error = sr_coded_error >> 8;
+ fps.intra_error = (double)(intra_error >> 8);
+ fps.coded_error = (double)(coded_error >> 8);
+ fps.sr_coded_error = (double)(sr_coded_error >> 8);
fps.ssim_weighted_pred_err = fps.coded_error * simple_weight(cpi->Source);
fps.count = 1.0;
fps.pcnt_inter = (double)intercount / cm->MBs;
@@ -791,14 +786,14 @@ void vp9_first_pass(VP9_COMP *cpi) {
// cpi->source_time_stamp.
fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start);
- // don't want to do output stats with a stack variable!
+ // Don't want to do output stats with a stack variable!
twopass->this_frame_stats = fps;
output_stats(cpi, cpi->output_pkt_list, &twopass->this_frame_stats);
accumulate_stats(&twopass->total_stats, &fps);
}
// Copy the previous Last Frame back into gf and and arf buffers if
- // the prediction is good enough... but also dont allow it to lag too far
+ // the prediction is good enough... but also don't allow it to lag too far.
if ((twopass->sr_update_lag > 3) ||
((cm->current_video_frame > 0) &&
(twopass->this_frame_stats.pcnt_inter > 0.20) &&
@@ -807,9 +802,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
twopass->sr_update_lag = 1;
} else {
- twopass->sr_update_lag++;
+ ++twopass->sr_update_lag;
}
- // swap frame pointers so last frame refers to the frame we just compressed
+ // Swap frame pointers so last frame refers to the frame we just compressed.
swap_yv12(lst_yv12, new_yv12);
vp9_extend_frame_borders(lst_yv12, cm->subsampling_x, cm->subsampling_y);
@@ -819,7 +814,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
if (cm->current_video_frame == 0)
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
- // use this to see what the first pass reconstruction looks like
+ // Use this to see what the first pass reconstruction looks like.
if (0) {
char filename[512];
FILE *recon_file;
@@ -835,15 +830,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
fclose(recon_file);
}
- cm->current_video_frame++;
+ ++cm->current_video_frame;
}
-// Estimate a cost per mb attributable to overheads such as the coding of
-// modes and motion vectors.
-// Currently simplistic in its assumptions for testing.
-//
-
-
+// Estimate a cost per mb attributable to overheads such as the coding of modes
+// and motion vectors. This currently makes simplistic assumptions for testing.
static double bitcost(double prob) {
return -(log(prob) / log(2.0));
}
@@ -866,18 +857,17 @@ static int64_t estimate_modemvcost(VP9_COMP *cpi,
motion_cost = bitcost(av_pct_motion);
intra_cost = bitcost(av_intra);
- // Estimate of extra bits per mv overhead for mbs
- // << 9 is the normalization to the (bits * 512) used in vp9_rc_bits_per_mb
+ // Estimate the number of extra bits per mv overhead for mbs. We shift (<< 9)
+ // to match the scaling of number of bits by 512.
mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9;
- // Crude estimate of overhead cost from modes
- // << 9 is the normalization to (bits * 512) used in vp9_rc_bits_per_mb
+ // Produce a crude estimate of the overhead cost from modes. We shift (<< 9)
+ // to match the scaling of number of bits by 512.
mode_cost =
(int)((((av_pct_inter - av_pct_motion) * zz_cost) +
(av_pct_motion * motion_cost) +
(av_intra * intra_cost)) * cpi->common.MBs) << 9;
- // return mv_cost + mode_cost;
// TODO(paulwilkins): Fix overhead costs for extended Q range.
#endif
return 0;
@@ -894,19 +884,19 @@ static double calc_correction_factor(double err_per_mb,
const double power_term = MIN(vp9_convert_qindex_to_q(q) * 0.0125 + pt_low,
pt_high);
- // Calculate correction factor
+ // Calculate correction factor.
if (power_term < 1.0)
assert(error_term >= 0.0);
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh) {
+int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+ int section_target_bandwitdh) {
int q;
const int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
- RATE_CONTROL *const rc = &cpi->rc;
+ const RATE_CONTROL *const rc = &cpi->rc;
const double section_err = fpstats->coded_error / fpstats->count;
const double err_per_mb = section_err / num_mbs;
@@ -920,7 +910,7 @@ static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
- for (q = rc->best_quality; q < rc->worst_quality; q++) {
+ for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double err_correction_factor = calc_correction_factor(err_per_mb,
ERR_DIVISOR, 0.5, 0.90, q);
const int bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q,
@@ -936,58 +926,6 @@ static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
return q;
}
-// For cq mode estimate a cq level that matches the observed
-// complexity and data rate.
-static int estimate_cq(VP9_COMP *cpi,
- FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh) {
- int q;
- int num_mbs = cpi->common.MBs;
- int target_norm_bits_per_mb;
-
- double section_err = (fpstats->coded_error / fpstats->count);
- double err_per_mb = section_err / num_mbs;
- double err_correction_factor;
- double clip_iiratio;
- double clip_iifactor;
-
- target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
- ? (512 * section_target_bandwitdh) / num_mbs
- : 512 * (section_target_bandwitdh / num_mbs);
-
-
- // II ratio correction factor for clip as a whole
- clip_iiratio = cpi->twopass.total_stats.intra_error /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
- clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
- if (clip_iifactor < 0.80)
- clip_iifactor = 0.80;
-
- // Try and pick a Q that can encode the content at the given rate.
- for (q = 0; q < MAXQ; q++) {
- int bits_per_mb_at_this_q;
-
- // Error per MB based correction factor
- err_correction_factor =
- calc_correction_factor(err_per_mb, 100.0, 0.5, 0.90, q) * clip_iifactor;
-
- bits_per_mb_at_this_q =
- vp9_rc_bits_per_mb(INTER_FRAME, q, err_correction_factor);
-
- if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
- break;
- }
-
- // Clip value to range "best allowed to (worst allowed - 1)"
- q = select_cq_level(q);
- if (q >= cpi->rc.worst_quality)
- q = cpi->rc.worst_quality - 1;
- if (q < cpi->rc.best_quality)
- q = cpi->rc.best_quality;
-
- return q;
-}
-
extern void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_init_second_pass(VP9_COMP *cpi) {
@@ -1005,11 +943,11 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
twopass->total_stats = *twopass->stats_in_end;
twopass->total_left_stats = twopass->total_stats;
- // each frame can have a different duration, as the frame rate in the source
- // isn't guaranteed to be constant. The frame rate prior to the first frame
- // encoded in the second pass is a guess. However the sum duration is not.
- // Its calculated based on the actual durations of all frames from the first
- // pass.
+ // Each frame can have a different duration, as the frame rate in the source
+ // isn't guaranteed to be constant. The frame rate prior to the first frame
+ // encoded in the second pass is a guess. However, the sum duration is not.
+ // It is calculated based on the actual durations of all frames from the
+ // first pass.
vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count /
twopass->total_stats.duration);
@@ -1020,18 +958,18 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
// Calculate a minimum intra value to be used in determining the IIratio
// scores used in the second pass. We have this minimum to make sure
// that clips that are static but "low complexity" in the intra domain
- // are still boosted appropriately for KF/GF/ARF
+ // are still boosted appropriately for KF/GF/ARF.
twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
- // This variable monitors how far behind the second ref update is lagging
+ // This variable monitors how far behind the second ref update is lagging.
twopass->sr_update_lag = 1;
// Scan the first pass file and calculate an average Intra / Inter error score
// ratio for the sequence.
{
double sum_iiratio = 0.0;
- start_pos = twopass->stats_in; // Note the starting "file" position.
+ start_pos = twopass->stats_in;
while (input_stats(twopass, &this_frame) != EOF) {
const double iiratio = this_frame.intra_error /
@@ -1042,7 +980,6 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
twopass->avg_iiratio = sum_iiratio /
DOUBLE_DIVIDE_CHECK((double)twopass->total_stats.count);
- // Reset file position
reset_fpf_position(twopass, start_pos);
}
@@ -1052,7 +989,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
double av_error = twopass->total_stats.ssim_weighted_pred_err /
DOUBLE_DIVIDE_CHECK(twopass->total_stats.count);
- start_pos = twopass->stats_in; // Note starting "file" position
+ start_pos = twopass->stats_in;
twopass->modified_error_total = 0.0;
twopass->modified_error_min =
@@ -1073,8 +1010,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
void vp9_end_second_pass(VP9_COMP *cpi) {
}
-// This function gives and estimate of how badly we believe
-// the prediction quality is decaying from frame to frame.
+// This function gives an estimate of how badly we believe the prediction
+// quality is decaying from frame to frame.
static double get_prediction_decay_rate(const VP9_COMMON *cm,
const FIRSTPASS_STATS *next_frame) {
// Look at the observed drop in prediction quality between the last frame
@@ -1091,12 +1028,10 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm,
// Function to test for a condition where a complex transition is followed
// by a static section. For example in slide shows where there is a fade
// between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(
- VP9_COMP *cpi,
- int frame_interval,
- int still_interval,
- double loop_decay_rate,
- double last_decay_rate) {
+static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
+ int still_interval,
+ double loop_decay_rate,
+ double last_decay_rate) {
int trans_to_still = 0;
// Break clause to detect very still sections after motion
@@ -1109,9 +1044,8 @@ static int detect_transition_to_still(
FIRSTPASS_STATS *position = cpi->twopass.stats_in;
FIRSTPASS_STATS tmp_next_frame;
- // Look ahead a few frames to see if static condition
- // persists...
- for (j = 0; j < still_interval; j++) {
+ // Look ahead a few frames to see if static condition persists...
+ for (j = 0; j < still_interval; ++j) {
if (EOF == input_stats(&cpi->twopass, &tmp_next_frame))
break;
@@ -1121,7 +1055,7 @@ static int detect_transition_to_still(
reset_fpf_position(&cpi->twopass, position);
- // Only if it does do we signal a transition to still
+ // Only if it does do we signal a transition to still.
if (j == still_interval)
trans_to_still = 1;
}
@@ -1131,7 +1065,7 @@ static int detect_transition_to_still(
// This function detects a flash through the high relative pcnt_second_ref
// score in the frame following a flash frame. The offset passed in should
-// reflect this
+// reflect this.
static int detect_flash(const struct twopass_rc *twopass, int offset) {
FIRSTPASS_STATS next_frame;
@@ -1144,7 +1078,7 @@ static int detect_flash(const struct twopass_rc *twopass, int offset) {
// brief break in prediction (such as a flash) but subsequent frames
// are reasonably well predicted by an earlier (pre flash) frame.
// The recovery after a flash is indicated by a high pcnt_second_ref
- // comapred to pcnt_inter.
+ // compared to pcnt_inter.
if (next_frame.pcnt_second_ref > next_frame.pcnt_inter &&
next_frame.pcnt_second_ref >= 0.5)
flash_detected = 1;
@@ -1153,7 +1087,7 @@ static int detect_flash(const struct twopass_rc *twopass, int offset) {
return flash_detected;
}
-// Update the motion related elements to the GF arf boost calculation
+// Update the motion related elements to the GF arf boost calculation.
static void accumulate_frame_motion_stats(
FIRSTPASS_STATS *this_frame,
double *this_frame_mv_in_out,
@@ -1165,13 +1099,13 @@ static void accumulate_frame_motion_stats(
// Accumulate motion stats.
motion_pct = this_frame->pcnt_motion;
- // Accumulate Motion In/Out of frame stats
+ // Accumulate Motion In/Out of frame stats.
*this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
*mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
*abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct);
// Accumulate a measure of how uniform (or conversely how random)
- // the motion field is. (A ratio of absmv / mv)
+ // the motion field is (a ratio of absmv / mv).
if (motion_pct > 0.05) {
const double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr));
@@ -1194,7 +1128,7 @@ static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame,
double this_frame_mv_in_out) {
double frame_boost;
- // Underlying boost factor is based on inter intra error ratio
+ // Underlying boost factor is based on inter intra error ratio.
if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
frame_boost = (IIFACTOR * this_frame->intra_error /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
@@ -1202,13 +1136,12 @@ static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame,
frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
- // Increase boost for frames where new data coming into frame
- // (eg zoom out). Slightly reduce boost if there is a net balance
- // of motion out of the frame (zoom in).
- // The range for this_frame_mv_in_out is -1.0 to +1.0
+ // Increase boost for frames where new data coming into frame (e.g. zoom out).
+ // Slightly reduce boost if there is a net balance of motion out of the frame
+ // (zoom in). The range for this_frame_mv_in_out is -1.0 to +1.0.
if (this_frame_mv_in_out > 0.0)
frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
- // In extreme case boost is halved
+ // In the extreme case the boost is halved.
else
frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
@@ -1230,12 +1163,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
int arf_boost;
int flash_detected = 0;
- // Search forward from the proposed arf/next gf position
- for (i = 0; i < f_frames; i++) {
+ // Search forward from the proposed arf/next gf position.
+ for (i = 0; i < f_frames; ++i) {
if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
break;
- // Update the motion related elements to the boost calculation
+ // Update the motion related elements to the boost calculation.
accumulate_frame_motion_stats(&this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
@@ -1246,7 +1179,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
flash_detected = detect_flash(twopass, i + offset) ||
detect_flash(twopass, i + offset + 1);
- // Cumulative effect of prediction quality decay
+ // Accumulate the effect of prediction quality decay.
if (!flash_detected) {
decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
@@ -1259,7 +1192,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
*f_boost = (int)boost_score;
- // Reset for backward looking loop
+ // Reset for backward looking loop.
boost_score = 0.0;
mv_ratio_accumulator = 0.0;
decay_accumulator = 1.0;
@@ -1267,12 +1200,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
mv_in_out_accumulator = 0.0;
abs_mv_in_out_accumulator = 0.0;
- // Search backward towards last gf position
- for (i = -1; i >= -b_frames; i--) {
+ // Search backward towards last gf position.
+ for (i = -1; i >= -b_frames; --i) {
if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
break;
- // Update the motion related elements to the boost calculation
+ // Update the motion related elements to the boost calculation.
accumulate_frame_motion_stats(&this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
@@ -1283,7 +1216,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
flash_detected = detect_flash(twopass, i + offset) ||
detect_flash(twopass, i + offset + 1);
- // Cumulative effect of prediction quality decay
+ // Cumulative effect of prediction quality decay.
if (!flash_detected) {
decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
@@ -1333,8 +1266,7 @@ static void schedule_frames(VP9_COMP *cpi, const int start, const int end,
return;
}
- // ARF Group: work out the ARF schedule.
- // Mark ARF frames as negative.
+ // ARF Group: Work out the ARF schedule and mark ARF frames as negative.
if (end < 0) {
// printf("start:%d end:%d\n", -end, -end);
// ARF frame is at the end of the range.
@@ -1457,14 +1389,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double decay_accumulator = 1.0;
double zero_motion_accumulator = 1.0;
- double loop_decay_rate = 1.00; // Starting decay rate
+ double loop_decay_rate = 1.00;
double last_loop_decay_rate = 1.00;
double this_frame_mv_in_out = 0.0;
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh;
- int max_bits = frame_max_bits(cpi); // Max for a single frame
+ const int max_bits = frame_max_bits(cpi); // Max bits for a single frame.
unsigned int allow_alt_ref = cpi->oxcf.play_alternate &&
cpi->oxcf.lag_in_frames;
@@ -1477,19 +1409,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->gf_group_bits = 0;
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
start_pos = twopass->stats_in;
// Load stats for the current frame.
mod_frame_err = calculate_modified_err(cpi, this_frame);
- // Note the error of the frame at the start of the group (this will be
- // the GF frame error if we code a normal gf
+ // Note the error of the frame at the start of the group. This will be
+ // the GF frame error if we code a normal gf.
gf_first_frame_err = mod_frame_err;
// If this is a key frame or the overlay from a previous arf then
- // The error score / cost of this frame has already been accounted for.
+ // the error score / cost of this frame has already been accounted for.
if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active)
gf_group_err -= gf_first_frame_err;
@@ -1511,9 +1443,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
i = 0;
while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) {
- i++; // Increment the loop counter
+ ++i;
- // Accumulate error score of frames in this gf group
+ // Accumulate error score of frames in this gf group.
mod_frame_err = calculate_modified_err(cpi, this_frame);
gf_group_err += mod_frame_err;
@@ -1524,13 +1456,13 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// quality back to an earlier frame is then restored.
flash_detected = detect_flash(twopass, 0);
- // Update the motion related elements to the boost calculation
+ // Update the motion related elements to the boost calculation.
accumulate_frame_motion_stats(&next_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
&mv_ratio_accumulator);
- // Cumulative effect of prediction quality decay
+ // Accumulate the effect of prediction quality decay.
if (!flash_detected) {
last_loop_decay_rate = loop_decay_rate;
loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
@@ -1543,8 +1475,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
next_frame.pcnt_motion;
}
- // Break clause to detect very still sections after motion
- // (for example a static image after a fade or other transition).
+ // Break clause to detect very still sections after motion. For example,
+ // a static image after a fade or other transition.
if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
last_loop_decay_rate)) {
allow_alt_ref = 0;
@@ -1552,16 +1484,16 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
}
- // Calculate a boost number for this frame
+ // Calculate a boost number for this frame.
boost_score += (decay_accumulator *
calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out));
// Break out conditions.
if (
- // Break at cpi->max_gf_interval unless almost totally static
+ // Break at cpi->max_gf_interval unless almost totally static.
(i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) ||
(
- // Don't break out with a very short interval
+ // Don't break out with a very short interval.
(i > MIN_GF_INTERVAL) &&
((boost_score > 125.0) || (next_frame.pcnt_inter < 0.75)) &&
(!flash_detected) &&
@@ -1580,10 +1512,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
- // Don't allow a gf too near the next kf
+ // Don't allow a gf too near the next kf.
if ((rc->frames_to_key - i) < MIN_GF_INTERVAL) {
while (i < (rc->frames_to_key + !rc->next_key_frame_forced)) {
- i++;
+ ++i;
if (EOF == input_stats(twopass, this_frame))
break;
@@ -1613,20 +1545,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
else
rc->baseline_gf_interval = i;
- // Should we use the alternate reference frame
+ // Should we use the alternate reference frame.
if (allow_alt_ref &&
(i < cpi->oxcf.lag_in_frames) &&
(i >= MIN_GF_INTERVAL) &&
- // for real scene cuts (not forced kfs) dont allow arf very near kf.
+ // For real scene cuts (not forced kfs) don't allow arf very near kf.
(rc->next_key_frame_forced ||
- (i <= (rc->frames_to_key - MIN_GF_INTERVAL))) &&
- ((next_frame.pcnt_inter > 0.75) ||
- (next_frame.pcnt_second_ref > 0.5)) &&
- ((mv_in_out_accumulator / (double)i > -0.2) ||
- (mv_in_out_accumulator > -2.0)) &&
- (boost_score > 100)) {
-
- // Alternative boost calculation for alt ref
+ (i <= (rc->frames_to_key - MIN_GF_INTERVAL)))) {
+ // Calculate the boost for alt ref.
rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost,
&b_boost);
rc->source_alt_ref_pending = 1;
@@ -1688,28 +1614,24 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
#endif
- // Calculate the bits to be allocated to the group as a whole
- if ((cpi->twopass.kf_group_bits > 0) &&
- (cpi->twopass.kf_group_error_left > 0)) {
- cpi->twopass.gf_group_bits =
- (int64_t)(cpi->twopass.kf_group_bits *
+ // Calculate the bits to be allocated to the group as a whole.
+ if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) {
+ twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits *
(gf_group_err / cpi->twopass.kf_group_error_left));
} else {
- cpi->twopass.gf_group_bits = 0;
+ twopass->gf_group_bits = 0;
}
- cpi->twopass.gf_group_bits =
- (cpi->twopass.gf_group_bits < 0)
- ? 0
- : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
- ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
+ twopass->gf_group_bits = (twopass->gf_group_bits < 0) ?
+ 0 : (twopass->gf_group_bits > twopass->kf_group_bits) ?
+ twopass->kf_group_bits : twopass->gf_group_bits;
// Clip cpi->twopass.gf_group_bits based on user supplied data rate
- // variability limit (cpi->oxcf.two_pass_vbrmax_section)
- if (cpi->twopass.gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
- cpi->twopass.gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+ // variability limit, cpi->oxcf.two_pass_vbrmax_section.
+ if (twopass->gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
+ twopass->gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
- // Reset the file position
- reset_fpf_position(&cpi->twopass, start_pos);
+ // Reset the file position.
+ reset_fpf_position(twopass, start_pos);
// Assign bits to the arf or gf.
for (i = 0; i <= (rc->source_alt_ref_pending &&
@@ -1720,7 +1642,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int boost = (rc->gfu_boost * gfboost_qadjust(q)) / 100;
- // Set max and minimum boost and hence minimum allocation
+ // Set max and minimum boost and hence minimum allocation.
boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200);
if (rc->source_alt_ref_pending && i == 0)
@@ -1728,7 +1650,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
else
allocation_chunks = (rc->baseline_gf_interval * 100) + (boost - 100);
- // Prevent overflow
+ // Prevent overflow.
if (boost > 1023) {
int divisor = boost >> 10;
boost /= divisor;
@@ -1736,18 +1658,18 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Calculate the number of bits to be spent on the gf or arf based on
- // the boost number
- gf_bits = (int)((double)boost * (cpi->twopass.gf_group_bits /
- (double)allocation_chunks));
+ // the boost number.
+ gf_bits = (int)((double)boost * (twopass->gf_group_bits /
+ (double)allocation_chunks));
// If the frame that is to be boosted is simpler than the average for
// the gf/arf group then use an alternative calculation
- // based on the error score of the frame itself
+ // based on the error score of the frame itself.
if (rc->baseline_gf_interval < 1 ||
mod_frame_err < gf_group_err / (double)rc->baseline_gf_interval) {
- double alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits *
+ double alt_gf_grp_bits = (double)twopass->kf_group_bits *
(mod_frame_err * (double)rc->baseline_gf_interval) /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left);
+ DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left);
int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /
(double)allocation_chunks));
@@ -1758,70 +1680,68 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// If it is harder than other frames in the group make sure it at
// least receives an allocation in keeping with its relative error
// score, otherwise it may be worse off than an "un-boosted" frame.
- int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits *
+ int alt_gf_bits = (int)((double)twopass->kf_group_bits *
mod_frame_err /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left));
+ DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left));
if (alt_gf_bits > gf_bits)
gf_bits = alt_gf_bits;
}
- // Dont allow a negative value for gf_bits
+ // Don't allow a negative value for gf_bits.
if (gf_bits < 0)
gf_bits = 0;
if (i == 0) {
- cpi->twopass.gf_bits = gf_bits;
+ twopass->gf_bits = gf_bits;
}
if (i == 1 ||
(!rc->source_alt_ref_pending &&
- (cpi->common.frame_type != KEY_FRAME))) {
- // Per frame bit target for this frame
- rc->per_frame_bandwidth = gf_bits;
+ cpi->common.frame_type != KEY_FRAME)) {
+ // Calculate the per frame bit target for this frame.
+ vp9_rc_set_frame_target(cpi, gf_bits);
}
}
{
- // Adjust KF group bits and error remaining
- cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;
- cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
+ // Adjust KF group bits and error remaining.
+ twopass->kf_group_error_left -= (int64_t)gf_group_err;
+ twopass->kf_group_bits -= twopass->gf_group_bits;
- if (cpi->twopass.kf_group_bits < 0)
- cpi->twopass.kf_group_bits = 0;
+ if (twopass->kf_group_bits < 0)
+ twopass->kf_group_bits = 0;
- // If this is an arf update we want to remove the score for the
- // overlay frame at the end which will usually be very cheap to code.
- // The overlay frame has already in effect been coded so we want to spread
- // the remaining bits amoung the other frames/
+ // If this is an arf update we want to remove the score for the overlay
+ // frame at the end which will usually be very cheap to code.
+ // The overlay frame has already, in effect, been coded so we want to spread
+ // the remaining bits among the other frames.
// For normal GFs remove the score for the GF itself unless this is
// also a key frame in which case it has already been accounted for.
if (rc->source_alt_ref_pending) {
- cpi->twopass.gf_group_error_left = (int64_t)gf_group_err - mod_frame_err;
+ twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err);
} else if (cpi->common.frame_type != KEY_FRAME) {
- cpi->twopass.gf_group_error_left = (int64_t)(gf_group_err
+ twopass->gf_group_error_left = (int64_t)(gf_group_err
- gf_first_frame_err);
} else {
- cpi->twopass.gf_group_error_left = (int64_t)gf_group_err;
+ twopass->gf_group_error_left = (int64_t)gf_group_err;
}
- cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits;
+ twopass->gf_group_bits -= twopass->gf_bits;
- if (cpi->twopass.gf_group_bits < 0)
- cpi->twopass.gf_group_bits = 0;
+ if (twopass->gf_group_bits < 0)
+ twopass->gf_group_bits = 0;
// This condition could fail if there are two kfs very close together
- // despite (MIN_GF_INTERVAL) and would cause a divide by 0 in the
+ // despite MIN_GF_INTERVAL and would cause a divide by 0 in the
// calculation of alt_extra_bits.
if (rc->baseline_gf_interval >= 3) {
const int boost = rc->source_alt_ref_pending ? b_boost : rc->gfu_boost;
if (boost >= 150) {
- int alt_extra_bits;
- int pct_extra = (boost - 100) / 50;
- pct_extra = (pct_extra > 20) ? 20 : pct_extra;
-
- alt_extra_bits = (int)((cpi->twopass.gf_group_bits * pct_extra) / 100);
- cpi->twopass.gf_group_bits -= alt_extra_bits;
+ const int pct_extra = MIN(20, (boost - 100) / 50);
+ const int alt_extra_bits = (int)((twopass->gf_group_bits * pct_extra) /
+ 100);
+ twopass->gf_group_bits -= alt_extra_bits;
}
}
}
@@ -1830,20 +1750,20 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
FIRSTPASS_STATS sectionstats;
zero_stats(&sectionstats);
- reset_fpf_position(&cpi->twopass, start_pos);
+ reset_fpf_position(twopass, start_pos);
- for (i = 0; i < rc->baseline_gf_interval; i++) {
- input_stats(&cpi->twopass, &next_frame);
+ for (i = 0; i < rc->baseline_gf_interval; ++i) {
+ input_stats(twopass, &next_frame);
accumulate_stats(&sectionstats, &next_frame);
}
avg_stats(&sectionstats);
- cpi->twopass.section_intra_rating = (int)
+ twopass->section_intra_rating = (int)
(sectionstats.intra_error /
DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
- reset_fpf_position(&cpi->twopass, start_pos);
+ reset_fpf_position(twopass, start_pos);
}
}
@@ -1879,34 +1799,27 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
cpi->twopass.gf_group_bits = 0;
// Per frame bit target for this frame.
- cpi->rc.per_frame_bandwidth = target_frame_size;
-}
-
-static int test_for_kf_one_pass(VP9_COMP *cpi) {
- // Placeholder function for auto key frame
- return 0;
+ vp9_rc_set_frame_target(cpi, target_frame_size);
}
static int test_candidate_kf(VP9_COMP *cpi,
- FIRSTPASS_STATS *last_frame,
- FIRSTPASS_STATS *this_frame,
- FIRSTPASS_STATS *next_frame) {
+ const FIRSTPASS_STATS *last_frame,
+ const FIRSTPASS_STATS *this_frame,
+ const FIRSTPASS_STATS *next_frame) {
int is_viable_kf = 0;
- // Does the frame satisfy the primary criteria of a key frame
- // If so, then examine how well it predicts subsequent frames
+ // Does the frame satisfy the primary criteria of a key frame?
+ // If so, then examine how well it predicts subsequent frames.
if ((this_frame->pcnt_second_ref < 0.10) &&
(next_frame->pcnt_second_ref < 0.10) &&
((this_frame->pcnt_inter < 0.05) ||
- (((this_frame->pcnt_inter - this_frame->pcnt_neutral) < .35) &&
+ (((this_frame->pcnt_inter - this_frame->pcnt_neutral) < 0.35) &&
((this_frame->intra_error /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) &&
((fabs(last_frame->coded_error - this_frame->coded_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
- .40) ||
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > 0.40) ||
(fabs(last_frame->intra_error - this_frame->intra_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
- .40) ||
+ DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > 0.40) ||
((next_frame->intra_error /
DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) {
int i;
@@ -1920,37 +1833,34 @@ static int test_candidate_kf(VP9_COMP *cpi,
local_next_frame = *next_frame;
- // Note the starting file position so we can reset to it
+ // Note the starting file position so we can reset to it.
start_pos = cpi->twopass.stats_in;
- // Examine how well the key frame predicts subsequent frames
- for (i = 0; i < 16; i++) {
+ // Examine how well the key frame predicts subsequent frames.
+ for (i = 0; i < 16; ++i) {
double next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error /
DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
if (next_iiratio > RMAX)
next_iiratio = RMAX;
- // Cumulative effect of decay in prediction quality
+ // Cumulative effect of decay in prediction quality.
if (local_next_frame.pcnt_inter > 0.85)
decay_accumulator *= local_next_frame.pcnt_inter;
else
decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0;
- // decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
-
- // Keep a running total
+ // Keep a running total.
boost_score += (decay_accumulator * next_iiratio);
- // Test various breakout clauses
+ // Test various breakout clauses.
if ((local_next_frame.pcnt_inter < 0.05) ||
(next_iiratio < 1.5) ||
(((local_next_frame.pcnt_inter -
local_next_frame.pcnt_neutral) < 0.20) &&
(next_iiratio < 3.0)) ||
((boost_score - old_boost_score) < 3.0) ||
- (local_next_frame.intra_error < 200)
- ) {
+ (local_next_frame.intra_error < 200)) {
break;
}
@@ -1990,8 +1900,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double kf_mod_err = 0.0;
double kf_group_err = 0.0;
- double kf_group_intra_err = 0.0;
- double kf_group_coded_err = 0.0;
double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
RATE_CONTROL *const rc = &cpi->rc;
@@ -1999,23 +1907,23 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
vp9_zero(next_frame);
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
start_position = twopass->stats_in;
cpi->common.frame_type = KEY_FRAME;
- // is this a forced key frame by interval
+ // Is this a forced key frame by interval.
rc->this_key_frame_forced = rc->next_key_frame_forced;
- // Clear the alt ref active flag as this can never be active on a key frame
+ // Clear the alt ref active flag as this can never be active on a key frame.
rc->source_alt_ref_active = 0;
- // Kf is always a gf so clear frames till next gf counter
+ // KF is always a GF so clear frames till next gf counter.
rc->frames_till_gf_update_due = 0;
rc->frames_to_key = 1;
- // Take a copy of the initial frame details
+ // Take a copy of the initial frame details.
first_frame = *this_frame;
twopass->kf_group_bits = 0; // Total bits available to kf group
@@ -2023,86 +1931,75 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
kf_mod_err = calculate_modified_err(cpi, this_frame);
- // find the next keyframe
+ // Find the next keyframe.
i = 0;
while (twopass->stats_in < twopass->stats_in_end) {
- // Accumulate kf group error
+ // Accumulate kf group error.
kf_group_err += calculate_modified_err(cpi, this_frame);
- // These figures keep intra and coded error counts for all frames including
- // key frames in the group. The effect of the key frame itself can be
- // subtracted out using the first_frame data collected above.
- kf_group_intra_err += this_frame->intra_error;
- kf_group_coded_err += this_frame->coded_error;
-
- // load a the next frame's stats
+ // Load the next frame's stats.
last_frame = *this_frame;
input_stats(twopass, this_frame);
// Provided that we are not at the end of the file...
if (cpi->oxcf.auto_key &&
lookup_next_frame_stats(twopass, &next_frame) != EOF) {
- // Normal scene cut check
+ // Check for a scene cut.
if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
break;
-
- // How fast is prediction quality decaying
+ // How fast is the prediction quality decaying?
loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
// We want to know something about the recent past... rather than
- // as used elsewhere where we are concened with decay in prediction
+ // as used elsewhere where we are concerned with decay in prediction
// quality since the last GF or KF.
recent_loop_decay[i % 8] = loop_decay_rate;
decay_accumulator = 1.0;
- for (j = 0; j < 8; j++)
+ for (j = 0; j < 8; ++j)
decay_accumulator *= recent_loop_decay[j];
// Special check for transition or high motion followed by a
- // to a static scene.
+ // static scene.
if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i,
loop_decay_rate, decay_accumulator))
break;
- // Step on to the next frame
- rc->frames_to_key++;
+ // Step on to the next frame.
+ ++rc->frames_to_key;
// If we don't have a real key frame within the next two
- // forcekeyframeevery intervals then break out of the loop.
+ // key_frame_frequency intervals then break out of the loop.
if (rc->frames_to_key >= 2 * (int)cpi->key_frame_frequency)
break;
} else {
- rc->frames_to_key++;
+ ++rc->frames_to_key;
}
- i++;
+ ++i;
}
// If there is a max kf interval set by the user we must obey it.
// We already breakout of the loop above at 2x max.
- // This code centers the extra kf if the actual natural
- // interval is between 1x and 2x
+ // This code centers the extra kf if the actual natural interval
+ // is between 1x and 2x.
if (cpi->oxcf.auto_key &&
rc->frames_to_key > (int)cpi->key_frame_frequency) {
FIRSTPASS_STATS tmp_frame;
rc->frames_to_key /= 2;
- // Copy first frame details
+ // Copy first frame details.
tmp_frame = first_frame;
- // Reset to the start of the group
+ // Reset to the start of the group.
reset_fpf_position(twopass, start_position);
kf_group_err = 0;
- kf_group_intra_err = 0;
- kf_group_coded_err = 0;
- // Rescan to get the correct error data for the forced kf group
- for (i = 0; i < rc->frames_to_key; i++) {
- // Accumulate kf group errors
+ // Rescan to get the correct error data for the forced kf group.
+ for (i = 0; i < rc->frames_to_key; ++i) {
+ // Accumulate kf group errors.
kf_group_err += calculate_modified_err(cpi, &tmp_frame);
- kf_group_intra_err += tmp_frame.intra_error;
- kf_group_coded_err += tmp_frame.coded_error;
// Load the next frame's stats.
input_stats(twopass, &tmp_frame);
@@ -2114,28 +2011,22 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->next_key_frame_forced = 0;
}
- // Special case for the last key frame of the file
+ // Special case for the last key frame of the file.
if (twopass->stats_in >= twopass->stats_in_end) {
- // Accumulate kf group error
+ // Accumulate kf group error.
kf_group_err += calculate_modified_err(cpi, this_frame);
-
- // These figures keep intra and coded error counts for all frames including
- // key frames in the group. The effect of the key frame itself can be
- // subtracted out using the first_frame data collected above.
- kf_group_intra_err += this_frame->intra_error;
- kf_group_coded_err += this_frame->coded_error;
}
// Calculate the number of bits that should be assigned to the kf group.
if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
- // Max for a single normal frame (not key frame)
+ // Maximum number of bits for a single normal frame (not key frame).
int max_bits = frame_max_bits(cpi);
- // Maximum bits for the kf group
+ // Maximum number of bits allocated to the key frame group.
int64_t max_grp_bits;
// Default allocation based on bits left and relative
- // complexity of the section
+ // complexity of the section.
twopass->kf_group_bits = (int64_t)(twopass->bits_left *
(kf_group_err / twopass->modified_error_left));
@@ -2146,17 +2037,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
} else {
twopass->kf_group_bits = 0;
}
- // Reset the first pass file position
+ // Reset the first pass file position.
reset_fpf_position(twopass, start_position);
// Determine how big to make this keyframe based on how well the subsequent
// frames use inter blocks.
decay_accumulator = 1.0;
boost_score = 0.0;
- loop_decay_rate = 1.00; // Starting decay rate
// Scan through the kf group collating various stats.
- for (i = 0; i < rc->frames_to_key; i++) {
+ for (i = 0; i < rc->frames_to_key; ++i) {
double r;
if (EOF == input_stats(twopass, &next_frame))
@@ -2181,7 +2071,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (r > RMAX)
r = RMAX;
- // How fast is prediction quality decaying
+ // How fast is prediction quality decaying.
if (!detect_flash(twopass, 0)) {
loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
decay_accumulator *= loop_decay_rate;
@@ -2199,7 +2089,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
zero_stats(&sectionstats);
reset_fpf_position(twopass, start_position);
- for (i = 0; i < rc->frames_to_key; i++) {
+ for (i = 0; i < rc->frames_to_key; ++i) {
input_stats(twopass, &next_frame);
accumulate_stats(&sectionstats, &next_frame);
}
@@ -2210,10 +2100,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
}
- // Reset the first pass file position
+ // Reset the first pass file position.
reset_fpf_position(twopass, start_position);
- // Work out how many bits to allocate for the key frame itself
+ // Work out how many bits to allocate for the key frame itself.
if (1) {
int kf_boost = (int)boost_score;
int allocation_chunks;
@@ -2222,33 +2112,34 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (kf_boost < (rc->frames_to_key * 3))
kf_boost = (rc->frames_to_key * 3);
- if (kf_boost < MIN_BOOST)
- kf_boost = MIN_BOOST;
+ if (kf_boost < MIN_KF_BOOST)
+ kf_boost = MIN_KF_BOOST;
// Make a note of baseline boost and the zero motion
// accumulator value for use elsewhere.
rc->kf_boost = kf_boost;
twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
- // We do three calculations for kf size.
- // The first is based on the error score for the whole kf group.
- // The second (optionally) on the key frames own error if this is
- // smaller than the average for the group.
- // The final one insures that the frame receives at least the
- // allocation it would have received based on its own error score vs
- // the error score remaining
- // Special case if the sequence appears almost totaly static
- // In this case we want to spend almost all of the bits on the
- // key frame.
- // cpi->rc.frames_to_key-1 because key frame itself is taken
- // care of by kf_boost.
+ // Key frame size depends on:
+ // (1) the error score for the whole key frame group,
+ // (2) the key frames' own error if this is smaller than the
+ // average for the group (optional),
+ // (3) insuring that the frame receives at least the allocation it would
+ // have received based on its own error score vs the error score
+ // remaining.
+ // Special case:
+ // If the sequence appears almost totally static we want to spend almost
+ // all of the bits on the key frame.
+ //
+ // We use (cpi->rc.frames_to_key - 1) below because the key frame itself is
+ // taken care of by kf_boost.
if (zero_motion_accumulator >= 0.99) {
allocation_chunks = ((rc->frames_to_key - 1) * 10) + kf_boost;
} else {
allocation_chunks = ((rc->frames_to_key - 1) * 100) + kf_boost;
}
- // Prevent overflow
+ // Prevent overflow.
if (kf_boost > 1028) {
int divisor = kf_boost >> 10;
kf_boost /= divisor;
@@ -2258,7 +2149,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0
: twopass->kf_group_bits;
- // Calculate the number of bits to be spent on the key frame
+ // Calculate the number of bits to be spent on the key frame.
twopass->kf_bits = (int)((double)kf_boost *
((double)twopass->kf_group_bits / allocation_chunks));
@@ -2277,9 +2168,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (twopass->kf_bits > alt_kf_bits)
twopass->kf_bits = alt_kf_bits;
} else {
- // Else if it is much harder than other frames in the group make sure
- // it at least receives an allocation in keeping with its relative
- // error score
+ // Else if it is much harder than other frames in the group make sure
+ // it at least receives an allocation in keeping with its relative
+ // error score.
alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)));
@@ -2287,16 +2178,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_bits = alt_kf_bits;
}
}
-
twopass->kf_group_bits -= twopass->kf_bits;
-
- // Peer frame bit target for this frame
- rc->per_frame_bandwidth = twopass->kf_bits;
- // Convert to a per second bitrate
- cpi->target_bandwidth = (int)(twopass->kf_bits * cpi->output_framerate);
+ // Per frame bit target for this frame.
+ vp9_rc_set_frame_target(cpi, twopass->kf_bits);
}
- // Note the total error score of the kf group minus the key frame itself
+ // Note the total error score of the kf group minus the key frame itself.
twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
// Adjust the count of total modified error left.
@@ -2305,73 +2192,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->modified_error_left -= kf_group_err;
}
-void vp9_get_svc_params(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if ((cm->current_video_frame == 0) ||
- (cm->frame_flags & FRAMEFLAGS_KEY) ||
- (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
- cpi->key_frame_frequency == 0))) {
- cm->frame_type = KEY_FRAME;
- cpi->rc.source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- cpi->rc.frames_till_gf_update_due = INT_MAX;
- cpi->rc.baseline_gf_interval = INT_MAX;
-}
-
-// Use this macro to turn on/off use of alt-refs in one-pass mode.
-#define USE_ALTREF_FOR_ONE_PASS 1
-
-void vp9_get_one_pass_params(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if (!cpi->refresh_alt_ref_frame &&
- (cm->current_video_frame == 0 ||
- cm->frame_flags & FRAMEFLAGS_KEY ||
- cpi->rc.frames_to_key == 0 ||
- (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
- cm->frame_type = KEY_FRAME;
- cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
- cpi->rc.frames_to_key == 0;
- cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = KEY_FRAME_BOOST;
- cpi->rc.source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- if (cpi->rc.frames_till_gf_update_due == 0) {
- cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
- cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
- // NOTE: frames_till_gf_update_due must be <= frames_to_key.
- if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key)
- cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
- cpi->refresh_golden_frame = 1;
- cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
- cpi->rc.gfu_boost = 1000;
- }
-}
-
-void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if ((cm->current_video_frame == 0 ||
- cm->frame_flags & FRAMEFLAGS_KEY ||
- cpi->rc.frames_to_key == 0 ||
- (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
- cm->frame_type = KEY_FRAME;
- cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
- cpi->rc.frames_to_key == 0;
- cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = KEY_FRAME_BOOST;
- cpi->rc.source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- // Don't use gf_update by default in CBR mode.
- cpi->rc.frames_till_gf_update_due = INT_MAX;
- cpi->rc.baseline_gf_interval = INT_MAX;
-}
-
-void vp9_get_first_pass_params(VP9_COMP *cpi) {
+void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (!cpi->refresh_alt_ref_frame &&
(cm->current_video_frame == 0 ||
@@ -2380,11 +2201,11 @@ void vp9_get_first_pass_params(VP9_COMP *cpi) {
} else {
cm->frame_type = INTER_FRAME;
}
- // Do not use periodic key frames
+ // Do not use periodic key frames.
cpi->rc.frames_to_key = INT_MAX;
}
-void vp9_get_second_pass_params(VP9_COMP *cpi) {
+void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
struct twopass_rc *const twopass = &cpi->twopass;
@@ -2395,37 +2216,30 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) {
double this_frame_intra_error;
double this_frame_coded_error;
+ int target;
if (!twopass->stats_in)
return;
if (cpi->refresh_alt_ref_frame) {
cm->frame_type = INTER_FRAME;
- rc->per_frame_bandwidth = twopass->gf_bits;
+ vp9_rc_set_frame_target(cpi, twopass->gf_bits);
return;
}
vp9_clear_system_state();
if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
- rc->active_worst_quality = cpi->oxcf.cq_level;
+ twopass->active_worst_quality = cpi->oxcf.cq_level;
} else if (cm->current_video_frame == 0) {
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
- frames_left);
- const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats,
- section_target_bandwidth);
-
- rc->active_worst_quality = tmp_q;
+ frames_left);
+ const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats,
+ section_target_bandwidth);
+ twopass->active_worst_quality = tmp_q;
rc->ni_av_qi = tmp_q;
rc->avg_q = vp9_convert_qindex_to_q(tmp_q);
-
- // Limit the maxq value returned subsequently.
- // This increases the risk of overspend or underspend if the initial
- // estimate for the clip is bad, but helps prevent excessive
- // variation in Q, especially near the end of a clip
- // where for example a small overspend may cause Q to crash
- // adjust_maxq_qrange(cpi);
}
vp9_zero(this_frame);
if (EOF == input_stats(twopass, &this_frame))
@@ -2434,19 +2248,19 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) {
this_frame_intra_error = this_frame.intra_error;
this_frame_coded_error = this_frame.coded_error;
- // keyframe and section processing !
+ // Keyframe and section processing.
if (rc->frames_to_key == 0 ||
(cm->frame_flags & FRAMEFLAGS_KEY)) {
- // Define next KF group and assign bits to it
+ // Define next KF group and assign bits to it.
this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
} else {
cm->frame_type = INTER_FRAME;
}
- // Is this a GF / ARF (Note that a KF is always also a GF)
+ // Is this frame a GF / ARF? (Note: a key frame is always also a GF).
if (rc->frames_till_gf_update_due == 0) {
- // Define next gf group and assign bits to it
+ // Define next gf group and assign bits to it.
this_frame_copy = this_frame;
#if CONFIG_MULTIPLE_ARF
@@ -2461,18 +2275,19 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) {
if (twopass->gf_zeromotion_pct > 995) {
// As long as max_thresh for encode breakout is small enough, it is ok
- // to enable it for no-show frame, i.e. set enable_encode_breakout to 2.
+ // to enable it for show frame, i.e. set allow_encode_breakout to
+ // ENCODE_BREAKOUT_LIMITED.
if (!cm->show_frame)
- cpi->enable_encode_breakout = 0;
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_DISABLED;
else
- cpi->enable_encode_breakout = 2;
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_LIMITED;
}
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
cpi->refresh_golden_frame = 1;
} else {
- // Otherwise this is an ordinary frame
- // Assign bits from those allocated to the GF group
+ // Otherwise this is an ordinary frame.
+ // Assign bits from those allocated to the GF group.
this_frame_copy = this_frame;
assign_std_frame_bits(cpi, &this_frame_copy);
}
@@ -2488,13 +2303,13 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) {
}
}
- // Set nominal per second bandwidth for this frame
- cpi->target_bandwidth = (int)(rc->per_frame_bandwidth *
- cpi->output_framerate);
- if (cpi->target_bandwidth < 0)
- cpi->target_bandwidth = 0;
+ if (cpi->common.frame_type == KEY_FRAME)
+ target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target);
+ else
+ target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target);
+ vp9_rc_set_frame_target(cpi, target);
- // Update the total stats remaining structure
+ // Update the total stats remaining structure.
subtract_stats(&twopass->total_left_stats, &this_frame);
}
@@ -2503,5 +2318,18 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
cpi->twopass.bits_left -= cpi->rc.this_frame_target;
#else
cpi->twopass.bits_left -= 8 * bytes_used;
+ // Update bits left to the kf and gf groups to account for overshoot or
+ // undershoot on these frames.
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->twopass.kf_group_bits += cpi->rc.this_frame_target -
+ cpi->rc.projected_frame_size;
+
+ cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0);
+ } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
+ cpi->twopass.gf_group_bits += cpi->rc.this_frame_target -
+ cpi->rc.projected_frame_size;
+
+ cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
+ }
#endif
}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index ca5b10080..83e337b6d 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -10,25 +10,92 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
-#include "vp9/encoder/vp9_onyx_int.h"
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_init_first_pass(VP9_COMP *cpi);
-void vp9_first_pass(VP9_COMP *cpi);
-void vp9_end_first_pass(VP9_COMP *cpi);
+typedef struct {
+ double frame;
+ double intra_error;
+ double coded_error;
+ double sr_coded_error;
+ double ssim_weighted_pred_err;
+ double pcnt_inter;
+ double pcnt_motion;
+ double pcnt_second_ref;
+ double pcnt_neutral;
+ double MVr;
+ double mvr_abs;
+ double MVc;
+ double mvc_abs;
+ double MVrv;
+ double MVcv;
+ double mv_in_out_count;
+ double new_mv_count;
+ double duration;
+ double count;
+} FIRSTPASS_STATS;
-void vp9_init_second_pass(VP9_COMP *cpi);
-void vp9_get_second_pass_params(VP9_COMP *cpi);
-void vp9_end_second_pass(VP9_COMP *cpi);
+struct twopass_rc {
+ unsigned int section_intra_rating;
+ unsigned int next_iiratio;
+ unsigned int this_iiratio;
+ FIRSTPASS_STATS total_stats;
+ FIRSTPASS_STATS this_frame_stats;
+ FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
+ FIRSTPASS_STATS total_left_stats;
+ int first_pass_done;
+ int64_t bits_left;
+ int64_t clip_bits_total;
+ double avg_iiratio;
+ double modified_error_min;
+ double modified_error_max;
+ double modified_error_total;
+ double modified_error_left;
+ double kf_intra_err_min;
+ double gf_intra_err_min;
+ int static_scene_max_gf_interval;
+ int kf_bits;
+ // Remaining error from uncoded frames in a gf group. Two pass use only
+ int64_t gf_group_error_left;
-void vp9_get_first_pass_params(VP9_COMP *cpi);
-void vp9_get_one_pass_params(VP9_COMP *cpi);
-void vp9_get_one_pass_cbr_params(VP9_COMP *cpi);
-void vp9_get_svc_params(VP9_COMP *cpi);
+ // Projected total bits available for a key frame group of frames
+ int64_t kf_group_bits;
+ // Error score of frames still to be coded in kf group
+ int64_t kf_group_error_left;
+
+ // Projected Bits available for a group of frames including 1 GF or ARF
+ int64_t gf_group_bits;
+ // Bits for the golden frame or ARF - 2 pass only
+ int gf_bits;
+ int alt_extra_bits;
+
+ int sr_update_lag;
+
+ int kf_zeromotion_pct;
+ int gf_zeromotion_pct;
+
+ int active_worst_quality;
+};
+
+struct VP9_COMP;
+
+void vp9_init_first_pass(struct VP9_COMP *cpi);
+void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
+void vp9_first_pass(struct VP9_COMP *cpi);
+void vp9_end_first_pass(struct VP9_COMP *cpi);
+
+void vp9_init_second_pass(struct VP9_COMP *cpi);
+void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
+void vp9_end_second_pass(struct VP9_COMP *cpi);
+int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+ int section_target_bandwitdh);
+
+// Post encode update of the rate control parameters for 2-pass
+void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
+ uint64_t bytes_used);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index e6e59c05a..4b642e2b6 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -11,9 +11,12 @@
#include <stdlib.h>
#include "./vpx_config.h"
+
#include "vp9/common/vp9_common.h"
+
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_onyx_int.h"
struct lookahead_ctx {
unsigned int max_sz; /* Absolute size of the queue */
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index c50098678..44c1f9078 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -29,7 +29,6 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
- unsigned int best_err;
const int tmp_col_min = x->mv_col_min;
const int tmp_col_max = x->mv_col_max;
@@ -48,27 +47,22 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
ref_full.row = ref_mv->row >> 3;
/*cpi->sf.search_method == HEX*/
- best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit,
- 0, &v_fn_ptr, 0, ref_mv, dst_mv);
+ vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, &v_fn_ptr, 0,
+ ref_mv, dst_mv);
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
{
int distortion;
unsigned int sse;
- best_err = cpi->find_fractional_mv_step(
- x, dst_mv, ref_mv,
- cpi->common.allow_high_precision_mv,
- x->errorperbit, &v_fn_ptr,
- 0, cpi->sf.subpel_iters_per_step, NULL, NULL,
- & distortion, &sse);
+ cpi->find_fractional_mv_step(
+ x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &v_fn_ptr, 0, cpi->sf.subpel_iters_per_step, NULL, NULL, &distortion,
+ &sse);
}
vp9_set_mbmode_and_mvs(xd, NEWMV, dst_mv);
vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
- best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride,
- INT_MAX);
/* restore UMV window */
x->mv_col_min = tmp_col_min;
@@ -76,7 +70,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
- return best_err;
+ return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+ INT_MAX);
}
static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
@@ -355,7 +351,7 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
// If any of the blocks in the sequence failed then the MB
// goes in segment 0
- if (arf_not_zz[mi_row/2*cm->mb_cols + mi_col/2]) {
+ if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) {
ncnt[0]++;
cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0;
} else {
@@ -423,7 +419,7 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
golden_ref, cpi->Source);
}
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
separate_arf_mbs(cpi);
}
diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h
index 79dd2bc95..bc2a7048f 100644
--- a/vp9/encoder/vp9_mbgraph.h
+++ b/vp9/encoder/vp9_mbgraph.h
@@ -15,7 +15,23 @@
extern "C" {
#endif
-void vp9_update_mbgraph_stats(VP9_COMP *cpi);
+typedef struct {
+ struct {
+ int err;
+ union {
+ int_mv mv;
+ MB_PREDICTION_MODE mode;
+ } m;
+ } ref[MAX_REF_FRAMES];
+} MBGRAPH_MB_STATS;
+
+typedef struct {
+ MBGRAPH_MB_STATS *mb_stats;
+} MBGRAPH_FRAME_STATS;
+
+struct VP9_COMP;
+
+void vp9_update_mbgraph_stats(struct VP9_COMP *cpi);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index ec9934a30..7d6fd3b99 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -349,6 +349,10 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tr = br;
tc = bc;
}
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void) tr;
+ (void) tc;
bestmv->row = br;
bestmv->col = bc;
@@ -452,6 +456,11 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
tr = br;
tc = bc;
}
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void) tr;
+ (void) tc;
+
bestmv->row = br;
bestmv->col = bc;
@@ -466,7 +475,6 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
#undef PRE
#undef DIST
#undef CHECK_BETTER
-#undef SP
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
int range) {
@@ -476,11 +484,9 @@ static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
((col + range) <= x->mv_col_max);
}
-static INLINE int check_point(const MACROBLOCK *x, const MV *mv) {
- return (mv->col < x->mv_col_min) |
- (mv->col > x->mv_col_max) |
- (mv->row < x->mv_row_min) |
- (mv->row > x->mv_row_max);
+static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
+ return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
+ (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
}
#define CHECK_BETTER \
@@ -496,11 +502,6 @@ static INLINE int check_point(const MACROBLOCK *x, const MV *mv) {
}\
}
-#define get_next_chkpts(list, i, n) \
- list[0] = ((i) == 0 ? (n) - 1 : (i) - 1); \
- list[1] = (i); \
- list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);
-
#define MAX_PATTERN_SCALES 11
#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
@@ -578,7 +579,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < num_candidates[t]; i++) {
this_mv.row = br + candidates[t][i].row;
this_mv.col = bc + candidates[t][i].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + (this_mv.row * in_what_stride) +
this_mv.col;
@@ -622,7 +623,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < num_candidates[s]; i++) {
this_mv.row = br + candidates[s][i].row;
this_mv.col = bc + candidates[s][i].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + (this_mv.row * in_what_stride) +
this_mv.col;
@@ -644,7 +645,10 @@ static int vp9_pattern_search(const MACROBLOCK *x,
do {
int next_chkpts_indices[PATTERN_CANDIDATES_REF];
best_site = -1;
- get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
+ next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
+ next_chkpts_indices[1] = k;
+ next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
+
if (check_bounds(x, br, bc, 1 << s)) {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
@@ -659,7 +663,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + (this_mv.row * (in_what_stride)) +
this_mv.col;
@@ -698,7 +702,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < 4; i++) {
this_mv.row = br + neighbors[i].row;
this_mv.col = bc + neighbors[i].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + this_mv.row * in_what_stride +
this_mv.col;
@@ -851,12 +855,191 @@ int vp9_square_search(const MACROBLOCK *x,
square_num_candidates, square_candidates);
};
+// Number of candidates in first hex search
+#define FIRST_HEX_CANDIDATES 6
+// Index of previous hex search's best match
+#define PRE_BEST_CANDIDATE 6
+// Number of candidates in following hex search
+#define NEXT_HEX_CANDIDATES 3
+// Number of candidates in refining search
+#define REFINE_CANDIDATES 4
+
+int vp9_fast_hex_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv,
+ MV *best_mv) {
+ const MACROBLOCKD* const xd = &x->e_mbd;
+ static const MV hex[FIRST_HEX_CANDIDATES] = {
+ { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}
+ };
+ static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = {
+ {{ -2, 0}, { -1, -2}, {1, -2}},
+ {{ -1, -2}, {1, -2}, {2, 0}},
+ {{1, -2}, {2, 0}, {1, 2}},
+ {{2, 0}, {1, 2}, { -1, 2}},
+ {{1, 2}, { -1, 2}, { -2, 0}},
+ {{ -1, 2}, { -2, 0}, { -1, -2}}
+ };
+ static const MV neighbors[REFINE_CANDIDATES] = {
+ {0, -1}, { -1, 0}, {1, 0}, {0, 1}
+ };
+ int i, j;
+
+ const uint8_t *what = x->plane[0].src.buf;
+ const int what_stride = x->plane[0].src.stride;
+ const int in_what_stride = xd->plane[0].pre[0].stride;
+ int br, bc;
+ MV this_mv;
+ unsigned int bestsad = 0x7fffffff;
+ unsigned int thissad;
+ const uint8_t *base_offset;
+ const uint8_t *this_offset;
+ int k = -1;
+ int best_site = -1;
+ const int max_hex_search = 512;
+ const int max_dia_search = 32;
+
+ const int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+
+ // Adjust ref_mv to make sure it is within MV range
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ br = ref_mv->row;
+ bc = ref_mv->col;
+
+ // Check the start point
+ base_offset = xd->plane[0].pre[0].buf;
+ this_offset = base_offset + (br * in_what_stride) + bc;
+ this_mv.row = br;
+ this_mv.col = bc;
+ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // Initial 6-point hex search
+ if (check_bounds(x, br, bc, 2)) {
+ for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
+ this_mv.row = br + hex[i].row;
+ this_mv.col = bc + hex[i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+ bestsad);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
+ this_mv.row = br + hex[i].row;
+ this_mv.col = bc + hex[i].col;
+ if (!is_mv_in(x, &this_mv))
+ continue;
+ this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+ bestsad);
+ CHECK_BETTER
+ }
+ }
+
+ // Continue hex search if we find a better match in first round
+ if (best_site != -1) {
+ br += hex[best_site].row;
+ bc += hex[best_site].col;
+ k = best_site;
+
+ // Allow search covering maximum MV range
+ for (j = 1; j < max_hex_search; j++) {
+ best_site = -1;
+
+ if (check_bounds(x, br, bc, 2)) {
+ for (i = 0; i < 3; i++) {
+ this_mv.row = br + next_chkpts[k][i].row;
+ this_mv.col = bc + next_chkpts[k][i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+ bestsad);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < 3; i++) {
+ this_mv.row = br + next_chkpts[k][i].row;
+ this_mv.col = bc + next_chkpts[k][i].col;
+ if (!is_mv_in(x, &this_mv))
+ continue;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+ bestsad);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ br += next_chkpts[k][best_site].row;
+ bc += next_chkpts[k][best_site].col;
+ k += 5 + best_site;
+ if (k >= 12) k -= 12;
+ else if (k >= 6) k -= 6;
+ }
+ }
+ }
+
+ // Check 4 1-away neighbors
+ for (j = 0; j < max_dia_search; j++) {
+ best_site = -1;
+
+ if (check_bounds(x, br, bc, 1)) {
+ for (i = 0; i < REFINE_CANDIDATES; i++) {
+ this_mv.row = br + neighbors[i].row;
+ this_mv.col = bc + neighbors[i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+ bestsad);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < REFINE_CANDIDATES; i++) {
+ this_mv.row = br + neighbors[i].row;
+ this_mv.col = bc + neighbors[i].col;
+ if (!is_mv_in(x, &this_mv))
+ continue;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+ bestsad);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ br += neighbors[best_site].row;
+ bc += neighbors[best_site].col;
+ }
+ }
+
+ best_mv->row = br;
+ best_mv->col = bc;
+
+ return bestsad;
+}
+
#undef CHECK_BETTER
int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
- vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
- int *mvcost[2], const MV *center_mv) {
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *what = x->plane[0].src.buf;
const int what_stride = x->plane[0].src.stride;
@@ -866,10 +1049,10 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
MV this_mv;
- int bestsad = INT_MAX;
+ unsigned int bestsad = INT_MAX;
int ref_row, ref_col;
- int thissad;
+ unsigned int thissad;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const int *mvjsadcost = x->nmvjointsadcost;
@@ -970,8 +1153,9 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
int vp9_diamond_search_sad_c(const MACROBLOCK *x,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
- vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
- int *mvcost[2], const MV *center_mv) {
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ const MV *center_mv) {
int i, j, step;
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1104,7 +1288,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
int vp9_diamond_search_sadx4(const MACROBLOCK *x,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
- vp9_variance_fn_ptr_t *fn_ptr,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv) {
int i, j, step;
@@ -1283,148 +1467,122 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
MV *mvp_full, int step_param,
- int sadpb, int further_steps,
- int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, int_mv *dst_mv) {
- int_mv temp_mv;
- int thissme, n, num00;
- int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv.as_mv,
- step_param, sadpb, &num00,
+ int sadpb, int further_steps, int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv) {
+ MV temp_mv;
+ int thissme, n, num00 = 0;
+ int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ step_param, sadpb, &n,
fn_ptr, x->nmvjointcost,
x->mvcost, ref_mv);
- dst_mv->as_int = temp_mv.as_int;
-
- n = num00;
- num00 = 0;
+ *dst_mv = temp_mv;
- /* If there won't be more n-step search, check to see if refining search is
- * needed. */
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
if (n > further_steps)
do_refine = 0;
while (n < further_steps) {
- n++;
+ ++n;
if (num00) {
num00--;
} else {
- thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv.as_mv,
+ thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
fn_ptr, x->nmvjointcost, x->mvcost,
ref_mv);
- /* check to see if refining search is needed. */
- if (num00 > (further_steps - n))
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n)
do_refine = 0;
if (thissme < bestsme) {
bestsme = thissme;
- dst_mv->as_int = temp_mv.as_int;
+ *dst_mv = temp_mv;
}
}
}
- /* final 1-away diamond refining search */
- if (do_refine == 1) {
- int search_range = 8;
- int_mv best_mv;
- best_mv.as_int = dst_mv->as_int;
- thissme = cpi->refining_search_sad(x, &best_mv.as_mv, sadpb, search_range,
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV best_mv = *dst_mv;
+ thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
fn_ptr, x->nmvjointcost, x->mvcost,
ref_mv);
-
if (thissme < bestsme) {
bestsme = thissme;
- dst_mv->as_int = best_mv.as_int;
+ *dst_mv = best_mv;
}
}
+
return bestsme;
}
-int vp9_full_search_sad_c(const MACROBLOCK *x, MV *ref_mv,
+int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
- vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
- int *mvcost[2],
- const MV *center_mv, int n) {
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ const MV *center_mv, MV *best_mv) {
+ int r, c;
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *const what = x->plane[0].src.buf;
const int what_stride = x->plane[0].src.stride;
const uint8_t *const in_what = xd->plane[0].pre[0].buf;
const int in_what_stride = xd->plane[0].pre[0].stride;
- MV *best_mv = &xd->mi_8x8[0]->bmi[n].as_mv[0].as_mv;
- MV this_mv;
- int bestsad = INT_MAX;
- int r, c;
- int thissad;
- int ref_row = ref_mv->row;
- int ref_col = ref_mv->col;
- // Apply further limits to prevent us looking using vectors that stretch
- // beyond the UMV border
- const int row_min = MAX(ref_row - distance, x->mv_row_min);
- const int row_max = MIN(ref_row + distance, x->mv_row_max);
- const int col_min = MAX(ref_col - distance, x->mv_col_min);
- const int col_max = MIN(ref_col + distance, x->mv_col_max);
- const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
- // Work out the mid point for the search
- const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // Baseline value at the centre
- bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
- for (r = row_min; r < row_max; r++) {
- const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
- this_mv.row = r;
-
- for (c = col_min; c < col_max; c++) {
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- bestsad);
-
- this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
- bestaddress = check_here;
+ const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
+ ref_mv->col];
+ int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride,
+ 0x7fffffff) +
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+ *best_mv = *ref_mv;
+
+ for (r = row_min; r < row_max; ++r) {
+ for (c = col_min; c < col_max; ++c) {
+ const MV this_mv = {r, c};
+ const uint8_t *check_here = &in_what[r * in_what_stride + c];
+ const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+ best_sad) +
+ mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = this_mv;
+ best_address = check_here;
}
-
- check_here++;
}
}
- this_mv.row = best_mv->row * 8;
- this_mv.col = best_mv->col * 8;
-
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv,
- mvjcost, mvcost, x->errorperbit);
- else
+ if (best_sad < INT_MAX) {
+ unsigned int unused;
+ const MV mv = {best_mv->row * 8, best_mv->col * 8};
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &unused)
+ + mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ } else {
return INT_MAX;
+ }
}
-int vp9_full_search_sadx3(const MACROBLOCK *x, MV *ref_mv,
+int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
- vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
- int *mvcost[2], const MV *center_mv, int n) {
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ const MV *center_mv, MV *best_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *const what = x->plane[0].src.buf;
const int what_stride = x->plane[0].src.stride;
const uint8_t *const in_what = xd->plane[0].pre[0].buf;
const int in_what_stride = xd->plane[0].pre[0].stride;
- MV *best_mv = &xd->mi_8x8[0]->bmi[n].as_mv[0].as_mv;
MV this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1520,17 +1678,16 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, MV *ref_mv,
return INT_MAX;
}
-int vp9_full_search_sadx8(const MACROBLOCK *x, MV *ref_mv,
+int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
- vp9_variance_fn_ptr_t *fn_ptr,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
- const MV *center_mv, int n) {
+ const MV *center_mv, MV *best_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *const what = x->plane[0].src.buf;
const int what_stride = x->plane[0].src.stride;
const uint8_t *const in_what = xd->plane[0].pre[0].buf;
const int in_what_stride = xd->plane[0].pre[0].stride;
- MV *best_mv = &xd->mi_8x8[0]->bmi[n].as_mv[0].as_mv;
MV this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1656,7 +1813,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, MV *ref_mv,
int vp9_refining_search_sad_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
- int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+ int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1669,11 +1827,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
const uint8_t *const in_what = xd->plane[0].pre[0].buf;
const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
ref_mv->col];
- unsigned int thissad;
-
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- MV this_mv;
-
const int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
@@ -1685,18 +1839,13 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
int best_site = -1;
for (j = 0; j < 4; j++) {
- this_mv.row = ref_mv->row + neighbors[j].row;
- this_mv.col = ref_mv->col + neighbors[j].col;
-
- if ((this_mv.col > x->mv_col_min) &&
- (this_mv.col < x->mv_col_max) &&
- (this_mv.row > x->mv_row_min) &&
- (this_mv.row < x->mv_row_max)) {
+ const MV this_mv = {ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col};
+ if (is_mv_in(x, &this_mv)) {
const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
this_mv.col];
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- bestsad);
-
+ unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
+ in_what_stride, bestsad);
if (thissad < bestsad) {
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
mvjsadcost, mvsadcost, error_per_bit);
@@ -1718,20 +1867,21 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
}
}
- this_mv.row = ref_mv->row * 8;
- this_mv.col = ref_mv->col * 8;
-
- if (bestsad < INT_MAX)
+ if (bestsad < INT_MAX) {
+ unsigned int unused;
+ const MV mv = {ref_mv->row * 8, ref_mv->col * 8};
return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
- else
+ &unused) +
+ mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ } else {
return INT_MAX;
+ }
}
int vp9_refining_search_sadx4(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
- int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+ int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1844,8 +1994,10 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
// mode.
int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
- int search_range, vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2], const MV *center_mv,
+ int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ const MV *center_mv,
const uint8_t *second_pred, int w, int h) {
const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
@@ -1878,10 +2030,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
this_mv.row = ref_mv->row + neighbors[j].row;
this_mv.col = ref_mv->col + neighbors[j].col;
- if ((this_mv.col > x->mv_col_min) &&
- (this_mv.col < x->mv_col_max) &&
- (this_mv.row > x->mv_row_min) &&
- (this_mv.row < x->mv_row_max)) {
+ if (is_mv_in(x, &this_mv)) {
const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
this_mv.col];
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 28b46b503..586a74c9c 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -45,8 +45,8 @@ int vp9_init_search_range(struct VP9_COMP *cpi, int size);
int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
- vp9_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, int_mv *dst_mv);
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv);
int vp9_hex_search(const MACROBLOCK *x,
MV *ref_mv,
@@ -75,6 +75,14 @@ int vp9_square_search(const MACROBLOCK *x,
int use_mvcost,
const MV *center_mv,
MV *best_mv);
+int vp9_fast_hex_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv,
+ MV *best_mv);
typedef int (fractional_mv_step_fp) (
const MACROBLOCK *x,
@@ -107,15 +115,16 @@ typedef int (fractional_mv_step_comp_fp) (
extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
- MV *ref_mv, int sad_per_bit,
- int distance, vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, int sad_per_bit,
+ int distance,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
- const MV *center_mv, int n);
+ const MV *center_mv, MV *best_mv);
typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
MV *ref_mv, int sad_per_bit,
int distance,
- vp9_variance_fn_ptr_t *fn_ptr,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv);
@@ -123,13 +132,14 @@ typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit,
int *num00,
- vp9_variance_fn_ptr_t *fn_ptr,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv);
int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
- int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+ int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
const MV *center_mv, const uint8_t *second_pred,
int w, int h);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index a9b0718c8..95ebb0c6d 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -14,6 +14,8 @@
#include "./vpx_config.h"
#include "./vpx_scale_rtcd.h"
+#include "vpx/internal/vpx_psnr.h"
+#include "vpx_ports/vpx_timer.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_filter.h"
@@ -30,7 +32,6 @@
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_picklpf.h"
-#include "vp9/encoder/vp9_psnr.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
@@ -38,8 +39,6 @@
#include "vp9/encoder/vp9_vaq.h"
#include "vp9/encoder/vp9_resize.h"
-#include "vpx_ports/vpx_timer.h"
-
void vp9_entropy_mode_init();
void vp9_coef_tree_initialize();
@@ -93,19 +92,10 @@ FILE *kf_list;
FILE *keyfile;
#endif
-#ifdef SPEEDSTATS
-unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0};
-#endif
-
-#if defined(SECTIONBITS_OUTPUT)
-extern unsigned __int64 Sectionbits[500];
-#endif
-
-extern void vp9_init_quantizer(VP9_COMP *cpi);
+void vp9_init_quantizer(VP9_COMP *cpi);
static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
- {1.0, 1.5, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+ {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
switch (mode) {
@@ -163,20 +153,22 @@ void vp9_initialize_enc() {
}
static void dealloc_compressor_data(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+
// Delete sementation map
vpx_free(cpi->segmentation_map);
- cpi->segmentation_map = 0;
- vpx_free(cpi->common.last_frame_seg_map);
- cpi->common.last_frame_seg_map = 0;
+ cpi->segmentation_map = NULL;
+ vpx_free(cm->last_frame_seg_map);
+ cm->last_frame_seg_map = NULL;
vpx_free(cpi->coding_context.last_frame_seg_map_copy);
- cpi->coding_context.last_frame_seg_map_copy = 0;
+ cpi->coding_context.last_frame_seg_map_copy = NULL;
vpx_free(cpi->complexity_map);
cpi->complexity_map = 0;
vpx_free(cpi->active_map);
cpi->active_map = 0;
- vp9_free_frame_buffers(&cpi->common);
+ vp9_free_frame_buffers(cm);
vp9_free_frame_buffer(&cpi->last_frame_uf);
vp9_free_frame_buffer(&cpi->scaled_source);
@@ -203,19 +195,20 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
// to a target value
// target q value
int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int start_index = rc->worst_quality;
+ int target_index = rc->worst_quality;
int i;
- int start_index = cpi->rc.worst_quality;
- int target_index = cpi->rc.worst_quality;
// Convert the average q value to an index.
- for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; i++) {
+ for (i = rc->best_quality; i < rc->worst_quality; ++i) {
start_index = i;
if (vp9_convert_qindex_to_q(i) >= qstart)
break;
}
// Convert the q target to an index
- for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; i++) {
+ for (i = rc->best_quality; i < rc->worst_quality; ++i) {
target_index = i;
if (vp9_convert_qindex_to_q(i) >= qtarget)
break;
@@ -227,28 +220,23 @@ int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) {
// Computes a q delta (in "q index" terms) to get from a starting q value
// to a value that should equate to thegiven rate ratio.
-int vp9_compute_qdelta_by_rate(VP9_COMP *cpi,
- double base_q_index, double rate_target_ratio) {
+static int compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index,
+ double rate_target_ratio) {
int i;
- int base_bits_per_mb;
- int target_bits_per_mb;
int target_index = cpi->rc.worst_quality;
- // Make SURE use of floating point in this function is safe.
- vp9_clear_system_state();
-
// Look up the current projected bits per block for the base index
- base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type,
- base_q_index, 1.0);
+ const int base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type,
+ base_q_index, 1.0);
// Find the target bits per mb based on the base value and given ratio.
- target_bits_per_mb = rate_target_ratio * base_bits_per_mb;
+ const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
// Convert the q target to an index
- for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; i++) {
+ for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; ++i) {
target_index = i;
- if (vp9_rc_bits_per_mb(cpi->common.frame_type,
- i, 1.0) <= target_bits_per_mb )
+ if (vp9_rc_bits_per_mb(cpi->common.frame_type, i, 1.0) <=
+ target_bits_per_mb )
break;
}
@@ -258,11 +246,8 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi,
// This function sets up a set of segments with delta Q values around
// the baseline frame quantizer.
static void setup_in_frame_q_adj(VP9_COMP *cpi) {
- VP9_COMMON *cm = &cpi->common;
- struct segmentation *seg = &cm->seg;
- // double q_ratio;
- int segment;
- int qindex_delta;
+ VP9_COMMON *const cm = &cpi->common;
+ struct segmentation *const seg = &cm->seg;
// Make SURE use of floating point in this function is safe.
vp9_clear_system_state();
@@ -270,13 +255,14 @@ static void setup_in_frame_q_adj(VP9_COMP *cpi) {
if (cm->frame_type == KEY_FRAME ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ int segment;
+
// Clear down the segment map
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
// Clear down the complexity map used for rd
vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
- // Enable segmentation
vp9_enable_segmentation((VP9_PTR)cpi);
vp9_clearall_segfeatures(seg);
@@ -287,9 +273,8 @@ static void setup_in_frame_q_adj(VP9_COMP *cpi) {
vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
// Use some of the segments for in frame Q adjustment
- for (segment = 1; segment < 3; segment++) {
- qindex_delta =
- vp9_compute_qdelta_by_rate(cpi, cm->base_qindex,
+ for (segment = 1; segment < 2; segment++) {
+ const int qindex_delta = compute_qdelta_by_rate(cpi, cm->base_qindex,
in_frame_q_adj_ratio[segment]);
vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
@@ -297,8 +282,8 @@ static void setup_in_frame_q_adj(VP9_COMP *cpi) {
}
}
static void configure_static_seg_features(VP9_COMP *cpi) {
- VP9_COMMON *cm = &cpi->common;
- struct segmentation *seg = &cm->seg;
+ VP9_COMMON *const cm = &cpi->common;
+ struct segmentation *const seg = &cm->seg;
int high_q = (int)(cpi->rc.avg_q > 48.0);
int qi_delta;
@@ -442,13 +427,13 @@ static void print_seg_map(VP9_COMP *cpi) {
static void update_reference_segmentation_map(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
+ uint8_t *cache_ptr = cm->last_frame_seg_map;
int row, col;
- MODE_INFO **mi_8x8, **mi_8x8_ptr = cm->mi_grid_visible;
- uint8_t *cache_ptr = cm->last_frame_seg_map, *cache;
for (row = 0; row < cm->mi_rows; row++) {
- mi_8x8 = mi_8x8_ptr;
- cache = cache_ptr;
+ MODE_INFO **mi_8x8 = mi_8x8_ptr;
+ uint8_t *cache = cache_ptr;
for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
cache[0] = mi_8x8[0]->mbmi.segment_id;
mi_8x8_ptr += cm->mode_info_stride;
@@ -581,7 +566,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
int speed) {
int i;
sf->adaptive_rd_thresh = 1;
- sf->recode_loop = (speed < 1);
+ sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
if (speed == 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
@@ -599,7 +584,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->adaptive_pred_interp_filter = 1;
sf->auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -635,7 +620,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->last_partitioning_redo_frequency = 3;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -663,6 +648,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->reference_masking = 1;
sf->auto_mv_step_size = 1;
+ sf->disable_split_var_thresh = 32;
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -698,6 +684,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->reference_masking = 1;
sf->auto_mv_step_size = 1;
+ sf->disable_split_var_thresh = 64;
sf->disable_filter_search_var_thresh = 200;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -715,9 +702,9 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->adaptive_rd_thresh = 4;
sf->mode_skip_start = 6;
}
- if (speed == 5) {
+ if (speed >= 5) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->use_one_partition_size_always = 1;
+ sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_16X16;
sf->tx_size_search_method = frame_is_intra_only(cm) ?
USE_FULL_RD : USE_LARGESTALL;
@@ -752,7 +739,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
int speed) {
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
- sf->recode_loop = (speed < 1);
+ sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
+ sf->encode_breakout_thresh = 1;
+
if (speed == 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
@@ -770,10 +759,11 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->adaptive_pred_interp_filter = 1;
sf->auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->encode_breakout_thresh = 8;
}
if (speed >= 2) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
@@ -806,13 +796,14 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->last_partitioning_redo_frequency = 3;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->encode_breakout_thresh = 200;
}
if (speed >= 3) {
sf->use_square_partition_only = 1;
@@ -835,25 +826,37 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->use_fast_coef_updates = 2;
sf->adaptive_rd_thresh = 4;
sf->mode_skip_start = 6;
+ sf->encode_breakout_thresh = 400;
}
if (speed >= 4) {
sf->optimize_coefficients = 0;
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->use_fast_lpf_pick = 2;
+ sf->encode_breakout_thresh = 700;
}
if (speed >= 5) {
int i;
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->adaptive_rd_thresh = 5;
sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+ sf->adjust_partitioning_from_last_frame =
+ cm->last_frame_type == KEY_FRAME || (0 ==
+ (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
sf->subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
}
- sf->use_fast_lpf_pick = 2;
- sf->RD = 0;
+ sf->frame_parameter_update = 0;
+ sf->encode_breakout_thresh = 1000;
+ sf->search_method = FAST_HEX;
}
if (speed >= 6) {
- sf->super_fast_rtc = 1;
+ sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
+ }
+ if (speed >= 7) {
+ sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
+ sf->use_nonrd_pick_mode = 1;
}
}
@@ -867,13 +870,15 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
if (speed < 0)
speed = -speed;
+#if CONFIG_INTERNAL_STATS
for (i = 0; i < MAX_MODES; ++i)
cpi->mode_chosen_counts[i] = 0;
+#endif
// best quality defaults
- sf->RD = 1;
+ sf->frame_parameter_update = 1;
sf->search_method = NSTEP;
- sf->recode_loop = 1;
+ sf->recode_loop = ALLOW_RECODE;
sf->subpel_search_method = SUBPEL_TREE;
sf->subpel_iters_per_step = 2;
sf->subpel_force_stop = 0;
@@ -889,7 +894,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
sf->reference_masking = 0;
- sf->use_one_partition_size_always = 0;
+ sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
@@ -910,9 +915,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_uv_intra_rd_estimate = 0;
sf->use_fast_lpf_pick = 0;
sf->use_fast_coef_updates = 0;
- sf->using_small_partition_info = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
- sf->super_fast_rtc = 0;
+ sf->use_nonrd_pick_mode = 0;
+ sf->encode_breakout_thresh = 0;
switch (cpi->oxcf.mode) {
case MODE_BESTQUALITY:
@@ -941,7 +946,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// No recode for 1 pass.
if (cpi->pass == 0) {
- sf->recode_loop = 0;
+ sf->recode_loop = DISALLOW_RECODE;
sf->optimize_coefficients = 0;
}
@@ -957,25 +962,29 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
-#ifdef SPEEDSTATS
- frames_at_speed[cpi->speed]++;
-#endif
+ if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME &&
+ sf->encode_breakout_thresh > cpi->encode_breakout)
+ cpi->encode_breakout = sf->encode_breakout_thresh;
+
+ if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
+ sf->adaptive_pred_interp_filter = 0;
}
static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
- cpi->lookahead = vp9_lookahead_init(cpi->oxcf.width, cpi->oxcf.height,
+ cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
cm->subsampling_x, cm->subsampling_y,
- cpi->oxcf.lag_in_frames);
+ oxcf->lag_in_frames);
if (!cpi->lookahead)
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
- cpi->oxcf.width, cpi->oxcf.height,
+ oxcf->width, oxcf->height,
cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS))
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
}
@@ -1043,14 +1052,14 @@ static void update_frame_size(VP9_COMP *cpi) {
if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS))
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate last frame buffer");
if (vp9_realloc_frame_buffer(&cpi->scaled_source,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS))
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate scaled source buffer");
@@ -1100,15 +1109,13 @@ int vp9_reverse_trans(int x) {
void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
VP9_COMMON *const cm = &cpi->common;
- int64_t vbr_max_bits;
+ int vbr_max_bits;
if (framerate < 0.1)
framerate = 30;
cpi->oxcf.framerate = framerate;
cpi->output_framerate = cpi->oxcf.framerate;
- cpi->rc.per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth
- / cpi->output_framerate);
cpi->rc.av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth
/ cpi->output_framerate);
cpi->rc.min_frame_bandwidth = (int)(cpi->rc.av_per_frame_bandwidth *
@@ -1126,10 +1133,10 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
// be acheived because of a user specificed max q (e.g. when the user
// specifies lossless encode.
//
- vbr_max_bits = ((int64_t)cpi->rc.av_per_frame_bandwidth *
- (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100;
+ vbr_max_bits = (int)(((int64_t)cpi->rc.av_per_frame_bandwidth *
+ cpi->oxcf.two_pass_vbrmax_section) / 100);
cpi->rc.max_frame_bandwidth =
- MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
+ MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
// Set Maximum gf/arf interval
cpi->rc.max_gf_interval = 16;
@@ -1150,7 +1157,7 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
cpi->rc.max_gf_interval = cpi->twopass.static_scene_max_gf_interval;
}
-static int64_t rescale(int val, int64_t num, int denom) {
+static int64_t rescale(int64_t val, int64_t num, int denom) {
int64_t llnum = num;
int64_t llden = denom;
int64_t llval = val;
@@ -1158,6 +1165,124 @@ static int64_t rescale(int val, int64_t num, int denom) {
return (llval * llnum / llden);
}
+// Initialize layer context data from init_config().
+static void init_layer_context(VP9_COMP *const cpi) {
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ int temporal_layer = 0;
+ cpi->svc.spatial_layer_id = 0;
+ cpi->svc.temporal_layer_id = 0;
+ for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
+ ++temporal_layer) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
+ lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
+ lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q];
+ lrc->total_actual_bits = 0;
+ lrc->total_target_vs_actual = 0;
+ lrc->ni_tot_qi = 0;
+ lrc->tot_q = 0.0;
+ lrc->avg_q = 0.0;
+ lrc->ni_frames = 0;
+ lrc->decimation_count = 0;
+ lrc->decimation_factor = 0;
+ lrc->rate_correction_factor = 1.0;
+ lrc->key_frame_rate_correction_factor = 1.0;
+ lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
+ 1000;
+ lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level),
+ lc->target_bandwidth, 1000);
+ lrc->bits_off_target = lrc->buffer_level;
+ }
+}
+
+// Update the layer context from a change_config() call.
+static void update_layer_context_change_config(VP9_COMP *const cpi,
+ const int target_bandwidth) {
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int temporal_layer = 0;
+ float bitrate_alloc = 1.0;
+ for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
+ ++temporal_layer) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000;
+ bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
+ // Update buffer-related quantities.
+ lc->starting_buffer_level =
+ (int64_t)(oxcf->starting_buffer_level * bitrate_alloc);
+ lc->optimal_buffer_level =
+ (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc);
+ lc->maximum_buffer_size =
+ (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc);
+ lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
+ // Update framerate-related quantities.
+ lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+ lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+ // Update qp-related quantities.
+ lrc->worst_quality = rc->worst_quality;
+ lrc->best_quality = rc->best_quality;
+ }
+}
+
+// Prior to encoding the frame, update framerate-related quantities
+// for the current layer.
+static void update_layer_framerate(VP9_COMP *const cpi) {
+ int temporal_layer = cpi->svc.temporal_layer_id;
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+ lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
+ // Update the average layer frame size (non-cumulative per-frame-bw).
+ if (temporal_layer == 0) {
+ lc->avg_frame_size = lrc->av_per_frame_bandwidth;
+ } else {
+ double prev_layer_framerate = oxcf->framerate /
+ oxcf->ts_rate_decimator[temporal_layer - 1];
+ int prev_layer_target_bandwidth =
+ oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+ lc->avg_frame_size =
+ (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
+ (lc->framerate - prev_layer_framerate));
+ }
+}
+
+// Prior to encoding the frame, set the layer context, for the current layer
+// to be encoded, to the cpi struct.
+static void restore_layer_context(VP9_COMP *const cpi) {
+ int temporal_layer = cpi->svc.temporal_layer_id;
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ int frame_since_key = cpi->rc.frames_since_key;
+ int frame_to_key = cpi->rc.frames_to_key;
+ cpi->rc = lc->rc;
+ cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+ cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
+ cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
+ cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
+ cpi->output_framerate = lc->framerate;
+ // Reset the frames_since_key and frames_to_key counters to their values
+ // before the layer restore. Keep these defined for the stream (not layer).
+ cpi->rc.frames_since_key = frame_since_key;
+ cpi->rc.frames_to_key = frame_to_key;
+}
+
+// Save the layer context after encoding the frame.
+static void save_layer_context(VP9_COMP *const cpi) {
+ int temporal_layer = cpi->svc.temporal_layer_id;
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ lc->rc = cpi->rc;
+ lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth;
+ lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
+ lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
+ lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
+ lc->framerate = cpi->output_framerate;
+}
+
static void set_tile_limits(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -1184,12 +1309,20 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cm->subsampling_y = 0;
vp9_alloc_compressor_data(cpi);
+ // Spatial scalability.
+ cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
+ // Temporal scalability.
+ cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
+
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ init_layer_context(cpi);
+ }
+
// change includes all joint functionality
vp9_change_config(ptr, oxcf);
// Initialize active best and worst q and average q values.
- cpi->rc.active_worst_quality = cpi->oxcf.worst_allowed_q;
-
if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
cpi->rc.avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q;
cpi->rc.avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q;
@@ -1224,9 +1357,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->gld_fb_idx = 1;
cpi->alt_fb_idx = 2;
- cpi->current_layer = 0;
- cpi->use_svc = 0;
-
set_tile_limits(cpi);
cpi->fixed_divide[0] = 0;
@@ -1234,7 +1364,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->fixed_divide[i] = 0x80000 / i;
}
-
void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
VP9_COMMON *const cm = &cpi->common;
@@ -1248,6 +1377,9 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->oxcf = *oxcf;
+ if (cpi->oxcf.cpu_used == -6)
+ cpi->oxcf.play_alternate = 0;
+
switch (cpi->oxcf.mode) {
// Real time and one pass deprecated in test code base
case MODE_GOODQUALITY:
@@ -1298,6 +1430,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
for (i = 0; i < MAX_SEGMENTS; i++)
cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
}
+ cpi->encode_breakout = cpi->oxcf.encode_breakout;
// local file playback mode == really big buffer
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
@@ -1326,10 +1459,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->oxcf.target_bandwidth, 1000);
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
- if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) {
- cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size;
- cpi->rc.buffer_level = cpi->rc.bits_off_target;
- }
+ cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target,
+ cpi->oxcf.maximum_buffer_size);
+ cpi->rc.buffer_level = MIN(cpi->rc.buffer_level,
+ cpi->oxcf.maximum_buffer_size);
// Set up frame rate and related parameters rate control values.
vp9_new_framerate(cpi, cpi->oxcf.framerate);
@@ -1339,16 +1472,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->rc.best_quality = cpi->oxcf.best_allowed_q;
// active values should only be modified if out of new range
- cpi->rc.active_worst_quality = clamp(cpi->rc.active_worst_quality,
- cpi->rc.best_quality,
- cpi->rc.worst_quality);
cpi->cq_target_quality = cpi->oxcf.cq_level;
cm->interp_filter = DEFAULT_INTERP_FILTER;
- cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
-
cm->display_width = cpi->oxcf.width;
cm->display_height = cpi->oxcf.height;
@@ -1359,24 +1487,24 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
if (cpi->initial_width) {
// Increasing the size of the frame beyond the first seen frame, or some
- // otherwise signalled maximum size, is not supported.
+ // otherwise signaled maximum size, is not supported.
// TODO(jkoleszar): exit gracefully.
assert(cm->width <= cpi->initial_width);
assert(cm->height <= cpi->initial_height);
}
update_frame_size(cpi);
- cpi->speed = cpi->oxcf.cpu_used;
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth);
+ }
+
+ cpi->speed = abs(cpi->oxcf.cpu_used);
- if (cpi->oxcf.lag_in_frames == 0) {
- // force to allowlag to 0 if lag_in_frames is 0;
- cpi->oxcf.allow_lag = 0;
- } else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) {
- // Limit on lag buffers as these are not currently dynamically allocated
+ // Limit on lag buffers as these are not currently dynamically allocated.
+ if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
- }
- // YX Temp
#if CONFIG_MULTIPLE_ARF
vp9_zero(cpi->alt_ref_source);
#else
@@ -1441,6 +1569,7 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
int num_pix = num_4x4_blk << 4;
int i, k;
ctx->num_4x4_blk = num_4x4_blk;
+
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
@@ -1484,7 +1613,6 @@ static void init_pick_mode_context(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
-
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
@@ -1589,6 +1717,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
vp9_create_common(cm);
+ cpi->use_svc = 0;
+
init_config((VP9_PTR)cpi, oxcf);
init_pick_mode_context(cpi);
@@ -1604,9 +1734,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->alt_is_last = 0;
cpi->gold_is_alt = 0;
- // Spatial scalability
- cpi->number_spatial_layers = oxcf->ss_number_layers;
-
// Create the encoder segmentation map and set all entries to 0
CHECK_MEM_ERROR(cm, cpi->segmentation_map,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
@@ -1632,11 +1759,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
}
-#ifdef ENTROPY_STATS
- if (cpi->pass != 1)
- init_context_counters();
-#endif
-
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90 << 12;
cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -1741,7 +1863,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->output_pkt_list = oxcf->output_pkt_list;
- cpi->enable_encode_breakout = 1;
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
if (cpi->pass == 1) {
vp9_init_first_pass(cpi);
@@ -1908,10 +2030,12 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
/ time_encoded;
if (cpi->b_calculate_psnr) {
- const double total_psnr = vp9_mse2psnr(cpi->total_samples, 255.0,
- cpi->total_sq_error);
- const double totalp_psnr = vp9_mse2psnr(cpi->totalp_samples, 255.0,
- cpi->totalp_sq_error);
+ const double total_psnr =
+ vpx_sse_to_psnr((double)cpi->total_samples, 255.0,
+ (double)cpi->total_sq_error);
+ const double totalp_psnr =
+ vpx_sse_to_psnr((double)cpi->totalp_samples, 255.0,
+ (double)cpi->totalp_sq_error);
const double total_ssim = 100 * pow(cpi->summed_quality /
cpi->summed_weights, 8.0);
const double totalp_ssim = 100 * pow(cpi->summedp_quality /
@@ -1967,21 +2091,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
}
#endif
-#if defined(SECTIONBITS_OUTPUT)
-
- if (0) {
- int i;
- FILE *f = fopen("tokenbits.stt", "a");
-
- for (i = 0; i < 28; i++)
- fprintf(f, "%8d", (int)(Sectionbits[i] / 256));
-
- fprintf(f, "\n");
- fclose(f);
- }
-
-#endif
-
#if 0
{
printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
@@ -2102,12 +2211,12 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
const int w = widths[i];
const int h = heights[i];
const uint32_t samples = w * h;
- const double sse = calc_plane_error(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i],
- w, h);
+ const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i],
+ b_planes[i], b_strides[i],
+ w, h);
psnr->sse[1 + i] = sse;
psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vp9_mse2psnr(samples, 255.0, sse);
+ psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);
total_sse += sse;
total_samples += samples;
@@ -2115,7 +2224,8 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
psnr->sse[0] = total_sse;
psnr->samples[0] = total_samples;
- psnr->psnr[0] = vp9_mse2psnr(total_samples, 255.0, total_sse);
+ psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, 255.0,
+ (double)total_sse);
}
static void generate_psnr_packet(VP9_COMP *cpi) {
@@ -2448,34 +2558,33 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) {
// Function to test for conditions that indicate we should loop
// back and recode a frame.
-static int recode_loop_test(VP9_COMP *cpi,
+static int recode_loop_test(const VP9_COMP *cpi,
int high_limit, int low_limit,
int q, int maxq, int minq) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
int force_recode = 0;
- VP9_COMMON *cm = &cpi->common;
// Special case trap if maximum allowed frame size exceeded.
- if (cpi->rc.projected_frame_size > cpi->rc.max_frame_bandwidth) {
+ if (rc->projected_frame_size > rc->max_frame_bandwidth) {
force_recode = 1;
// Is frame recode allowed.
// Yes if either recode mode 1 is selected or mode 2 is selected
// and the frame is a key frame, golden frame or alt_ref_frame
- } else if ((cpi->sf.recode_loop == 1) ||
- ((cpi->sf.recode_loop == 2) &&
- ((cm->frame_type == KEY_FRAME) ||
- cpi->refresh_golden_frame ||
- cpi->refresh_alt_ref_frame))) {
+ } else if ((cpi->sf.recode_loop == ALLOW_RECODE) ||
+ ((cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF) &&
+ (cm->frame_type == KEY_FRAME ||
+ cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
// General over and under shoot tests
- if (((cpi->rc.projected_frame_size > high_limit) && (q < maxq)) ||
- ((cpi->rc.projected_frame_size < low_limit) && (q > minq))) {
+ if ((rc->projected_frame_size > high_limit && q < maxq) ||
+ (rc->projected_frame_size < low_limit && q > minq)) {
force_recode = 1;
} else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
// Deal with frame undershoot and whether or not we are
// below the automatically set cq level.
if (q > cpi->cq_target_quality &&
- cpi->rc.projected_frame_size <
- ((cpi->rc.this_frame_target * 7) >> 3)) {
+ rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
force_recode = 1;
}
}
@@ -2583,7 +2692,7 @@ static void scale_references(VP9_COMP *cpi) {
vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS);
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
} else {
@@ -2629,14 +2738,14 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
int recon_err;
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
fprintf(f, "%10u %10d %10d %10d %10d %10d "
"%10"PRId64" %10"PRId64" %10d "
- "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
+ "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
"%6d %6d %5d %5d %5d "
"%10"PRId64" %10.3lf"
"%10lf %8u %10d %10d %10d\n",
@@ -2649,7 +2758,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
cpi->rc.total_actual_bits, cm->base_qindex,
vp9_convert_qindex_to_q(cm->base_qindex),
(double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
- vp9_convert_qindex_to_q(cpi->rc.active_worst_quality), cpi->rc.avg_q,
+ cpi->rc.avg_q,
vp9_convert_qindex_to_q(cpi->rc.ni_av_qi),
vp9_convert_qindex_to_q(cpi->cq_target_quality),
cpi->refresh_last_frame, cpi->refresh_golden_frame,
@@ -2673,8 +2782,6 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
for (i = 0; i < MAX_MODES; ++i)
fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
- for (i = 0; i < MAX_REFS; ++i)
- fprintf(fmodes, "%5d ", cpi->sub8x8_mode_chosen_counts[i]);
fprintf(fmodes, "\n");
@@ -2683,25 +2790,68 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
}
#endif
+static void encode_without_recode_loop(VP9_COMP *cpi,
+ size_t *size,
+ uint8_t *dest,
+ int q) {
+ VP9_COMMON *const cm = &cpi->common;
+ vp9_clear_system_state();
+ vp9_set_quantizer(cpi, q);
+
+ // Set up entropy context depending on frame type. The decoder mandates
+ // the use of the default context, index 0, for keyframes and inter
+ // frames where the error_resilient_mode or intra_only flag is set. For
+ // other inter-frames the encoder currently uses only two contexts;
+ // context 1 for ALTREF frames and context 0 for the others.
+ if (cm->frame_type == KEY_FRAME) {
+ vp9_setup_key_frame(cpi);
+ } else {
+ if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
+ cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
+ }
+ vp9_setup_inter_frame(cpi);
+ }
+ // Variance adaptive and in frame q adjustment experiments are mutually
+ // exclusive.
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp9_vaq_frame_setup(cpi);
+ } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ setup_in_frame_q_adj(cpi);
+ }
+ // transform / motion compensation build reconstruction frame
+ vp9_encode_frame(cpi);
+
+ // Update the skip mb flag probabilities based on the distribution
+ // seen in the last encoder iteration.
+ // update_base_skip_probs(cpi);
+ vp9_clear_system_state();
+}
+
static void encode_with_recode_loop(VP9_COMP *cpi,
size_t *size,
uint8_t *dest,
- int *q,
+ int q,
int bottom_index,
- int top_index,
- int frame_over_shoot_limit,
- int frame_under_shoot_limit) {
+ int top_index) {
VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
int loop_count = 0;
int loop = 0;
int overshoot_seen = 0;
int undershoot_seen = 0;
int q_low = bottom_index, q_high = top_index;
+ int frame_over_shoot_limit;
+ int frame_under_shoot_limit;
+
+ // Decide frame size bounds
+ vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
+ &frame_under_shoot_limit,
+ &frame_over_shoot_limit);
do {
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
- vp9_set_quantizer(cpi, *q);
+ vp9_set_quantizer(cpi, q);
if (loop_count == 0) {
// Set up entropy context depending on frame type. The decoder mandates
@@ -2712,7 +2862,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if (cm->frame_type == KEY_FRAME) {
vp9_setup_key_frame(cpi);
} else {
- if (!cm->intra_only && !cm->error_resilient_mode) {
+ if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
}
vp9_setup_inter_frame(cpi);
@@ -2728,25 +2878,24 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
}
// transform / motion compensation build reconstruction frame
-
vp9_encode_frame(cpi);
// Update the skip mb flag probabilities based on the distribution
// seen in the last encoder iteration.
// update_base_skip_probs(cpi);
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
// Dummy pack of the bitstream using up to date stats to get an
// accurate estimate of output frame size to determine if we need
// to recode.
- if (cpi->sf.recode_loop != 0) {
+ if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
vp9_save_coding_context(cpi);
cpi->dummy_packing = 1;
- if (!cpi->sf.super_fast_rtc)
+ if (!cpi->sf.use_nonrd_pick_mode)
vp9_pack_bitstream(cpi, dest, size);
- cpi->rc.projected_frame_size = (*size) << 3;
+ rc->projected_frame_size = (int)(*size) << 3;
vp9_restore_coding_context(cpi);
if (frame_over_shoot_limit == 0)
@@ -2757,9 +2906,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
loop = 0;
} else {
if ((cm->frame_type == KEY_FRAME) &&
- cpi->rc.this_key_frame_forced &&
- (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) {
- int last_q = *q;
+ rc->this_key_frame_forced &&
+ (rc->projected_frame_size < rc->max_frame_bandwidth)) {
+ int last_q = q;
int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
int high_err_target = cpi->ambient_err;
@@ -2771,65 +2920,65 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
// The key frame is not good enough or we can afford
// to make it better without undue risk of popping.
if ((kf_err > high_err_target &&
- cpi->rc.projected_frame_size <= frame_over_shoot_limit) ||
+ rc->projected_frame_size <= frame_over_shoot_limit) ||
(kf_err > low_err_target &&
- cpi->rc.projected_frame_size <= frame_under_shoot_limit)) {
+ rc->projected_frame_size <= frame_under_shoot_limit)) {
// Lower q_high
- q_high = *q > q_low ? *q - 1 : q_low;
+ q_high = q > q_low ? q - 1 : q_low;
// Adjust Q
- *q = ((*q) * high_err_target) / kf_err;
- *q = MIN((*q), (q_high + q_low) >> 1);
+ q = (q * high_err_target) / kf_err;
+ q = MIN(q, (q_high + q_low) >> 1);
} else if (kf_err < low_err_target &&
- cpi->rc.projected_frame_size >= frame_under_shoot_limit) {
+ rc->projected_frame_size >= frame_under_shoot_limit) {
// The key frame is much better than the previous frame
// Raise q_low
- q_low = *q < q_high ? *q + 1 : q_high;
+ q_low = q < q_high ? q + 1 : q_high;
// Adjust Q
- *q = ((*q) * low_err_target) / kf_err;
- *q = MIN((*q), (q_high + q_low + 1) >> 1);
+ q = (q * low_err_target) / kf_err;
+ q = MIN(q, (q_high + q_low + 1) >> 1);
}
// Clamp Q to upper and lower limits:
- *q = clamp(*q, q_low, q_high);
+ q = clamp(q, q_low, q_high);
- loop = *q != last_q;
+ loop = q != last_q;
} else if (recode_loop_test(
cpi, frame_over_shoot_limit, frame_under_shoot_limit,
- *q, MAX(q_high, top_index), bottom_index)) {
+ q, MAX(q_high, top_index), bottom_index)) {
// Is the projected frame size out of range and are we allowed
// to attempt to recode.
- int last_q = *q;
+ int last_q = q;
int retries = 0;
// Frame size out of permitted range:
// Update correction factor & compute new Q to try...
// Frame is too large
- if (cpi->rc.projected_frame_size > cpi->rc.this_frame_target) {
+ if (rc->projected_frame_size > rc->this_frame_target) {
// Special case if the projected size is > the max allowed.
- if (cpi->rc.projected_frame_size >= cpi->rc.max_frame_bandwidth)
- q_high = cpi->rc.worst_quality;
+ if (rc->projected_frame_size >= rc->max_frame_bandwidth)
+ q_high = rc->worst_quality;
// Raise Qlow as to at least the current value
- q_low = *q < q_high ? *q + 1 : q_high;
+ q_low = q < q_high ? q + 1 : q_high;
if (undershoot_seen || loop_count > 1) {
// Update rate_correction_factor unless
vp9_rc_update_rate_correction_factors(cpi, 1);
- *q = (q_high + q_low + 1) / 2;
+ q = (q_high + q_low + 1) / 2;
} else {
// Update rate_correction_factor unless
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
bottom_index, MAX(q_high, top_index));
- while (*q < q_low && retries < 10) {
+ while (q < q_low && retries < 10) {
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
bottom_index, MAX(q_high, top_index));
retries++;
}
@@ -2838,27 +2987,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
overshoot_seen = 1;
} else {
// Frame is too small
- q_high = *q > q_low ? *q - 1 : q_low;
+ q_high = q > q_low ? q - 1 : q_low;
if (overshoot_seen || loop_count > 1) {
vp9_rc_update_rate_correction_factors(cpi, 1);
- *q = (q_high + q_low) / 2;
+ q = (q_high + q_low) / 2;
} else {
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
bottom_index, top_index);
// Special case reset for qlow for constrained quality.
// This should only trigger where there is very substantial
// undershoot on a frame and the auto cq level is above
// the user passsed in value.
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
- *q < q_low) {
- q_low = *q;
+ q < q_low) {
+ q_low = q;
}
- while (*q > q_high && retries < 10) {
+ while (q > q_high && retries < 10) {
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
bottom_index, top_index);
retries++;
}
@@ -2868,17 +3017,17 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
}
// Clamp Q to upper and lower limits:
- *q = clamp(*q, q_low, q_high);
+ q = clamp(q, q_low, q_high);
- loop = *q != last_q;
+ loop = q != last_q;
} else {
loop = 0;
}
}
// Special case for overlay frame.
- if (cpi->rc.is_src_frame_alt_ref &&
- (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth))
+ if (rc->is_src_frame_alt_ref &&
+ rc->projected_frame_size < rc->max_frame_bandwidth)
loop = 0;
if (loop) {
@@ -2912,6 +3061,9 @@ static void get_ref_frame_flags(VP9_COMP *cpi) {
if (cpi->gold_is_last)
cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+ if (cpi->rc.frames_till_gf_update_due == INT_MAX)
+ cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+
if (cpi->alt_is_last)
cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
@@ -2943,20 +3095,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
TX_SIZE t;
int q;
- int frame_over_shoot_limit;
- int frame_under_shoot_limit;
int top_index;
int bottom_index;
- SPEED_FEATURES *const sf = &cpi->sf;
- unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height);
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ const unsigned int max_mv_def = MIN(cm->width, cm->height);
struct segmentation *const seg = &cm->seg;
set_ext_overrides(cpi);
/* Scale the source buffer, if required. */
- if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width ||
- cm->mi_rows * 8 != cpi->un_scaled_source->y_height) {
+ if (cm->mi_cols * MI_SIZE != cpi->un_scaled_source->y_width ||
+ cm->mi_rows * MI_SIZE != cpi->un_scaled_source->y_height) {
scale_and_extend_frame_nonnormative(cpi->un_scaled_source,
&cpi->scaled_source);
cpi->Source = &cpi->scaled_source;
@@ -2965,12 +3115,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
scale_references(cpi);
- // Clear down mmx registers to allow floating point in what follows.
vp9_clear_system_state();
- // Clear zbin over-quant value and mode boost values.
- cpi->zbin_mode_boost = 0;
-
// Enable or disable mode based tweaking of the zbin.
// For 2 pass only used where GF/ARF prediction quality
// is above a threshold.
@@ -2978,7 +3124,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->zbin_mode_boost_enabled = 0;
// Current default encoder behavior for the altref sign bias.
- cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
+ cm->ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
@@ -2987,7 +3133,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def);
// Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
if (sf->auto_mv_step_size) {
- if (frame_is_intra_only(&cpi->common)) {
+ if (frame_is_intra_only(cm)) {
// Initialize max_mv_magnitude for use in the first INTER frame
// after a key/intra-only frame.
cpi->max_mv_magnitude = max_mv_def;
@@ -2996,8 +3142,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
- cpi->mv_step_param = vp9_init_search_range(
- cpi, MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 *
+ cpi->max_mv_magnitude));
cpi->max_mv_magnitude = 0;
}
}
@@ -3020,7 +3166,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
cm->frame_parallel_decoding_mode =
(cpi->oxcf.frame_parallel_decoding_mode != 0);
+
+ // By default, encoder assumes decoder can use prev_mi.
+ cm->coding_use_prev_mi = 1;
if (cm->error_resilient_mode) {
+ cm->coding_use_prev_mi = 0;
cm->frame_parallel_decoding_mode = 1;
cm->reset_frame_context = 0;
cm->refresh_frame_context = 0;
@@ -3034,21 +3184,17 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// static regions if indicated.
// Only allowed in second pass of two pass (as requires lagged coding)
// and if the relevant speed feature flag is set.
- if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) {
+ if (cpi->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
- }
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame.
if (cpi->pass == 0 &&
cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
cm->frame_type != KEY_FRAME) {
- if (vp9_drop_frame(cpi)) {
- // Update buffer level with zero size, update frame counters, and return.
- vp9_update_buffer_level(cpi, 0);
- cm->last_frame_type = cm->frame_type;
+ if (vp9_rc_drop_frame(cpi)) {
vp9_rc_postencode_update_drop_frame(cpi);
- cm->current_video_frame++;
+ ++cm->current_video_frame;
return;
}
}
@@ -3086,44 +3232,20 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_write_yuv_frame(cpi->Source);
#endif
- // Decide how big to make the frame.
- vp9_rc_pick_frame_size_target(cpi);
-
- // Decide frame size bounds
- vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
- &frame_under_shoot_limit,
- &frame_over_shoot_limit);
-
// Decide q and q bounds.
- q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
- &bottom_index,
- &top_index);
-
- // JBB : This is realtime mode. In real time mode the first frame
- // should be larger. Q of 0 is disabled because we force tx size to be
- // 16x16...
- if (cpi->sf.super_fast_rtc) {
- if (cpi->common.current_video_frame == 0)
- q /= 3;
-
- if (q == 0)
- q++;
- }
+ q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
if (!frame_is_intra_only(cm)) {
cm->interp_filter = DEFAULT_INTERP_FILTER;
/* TODO: Decide this more intelligently */
- set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
+ set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
}
- encode_with_recode_loop(cpi,
- size,
- dest,
- &q,
- bottom_index,
- top_index,
- frame_over_shoot_limit,
- frame_under_shoot_limit);
+ if (cpi->sf.recode_loop == DISALLOW_RECODE) {
+ encode_without_recode_loop(cpi, size, dest, q);
+ } else {
+ encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
+ }
// Special case code to reduce pulsing when key frames are forced at a
// fixed interval. Note the reconstruction error if it is the frame before
@@ -3170,41 +3292,30 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
update_reference_frames(cpi);
for (t = TX_4X4; t <= TX_32X32; t++)
- full_to_model_counts(cpi->common.counts.coef[t],
- cpi->coef_counts[t]);
- if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode) {
- vp9_adapt_coef_probs(&cpi->common);
- }
-
- if (!frame_is_intra_only(&cpi->common)) {
- if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode) {
- vp9_adapt_mode_probs(&cpi->common);
- vp9_adapt_mv_probs(&cpi->common, cpi->common.allow_high_precision_mv);
- }
- }
+ full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
-#ifdef ENTROPY_STATS
- vp9_update_mode_context_stats(cpi);
-#endif
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
+ vp9_adapt_coef_probs(cm);
- /* Move storing frame_type out of the above loop since it is also
- * needed in motion search besides loopfilter */
- cm->last_frame_type = cm->frame_type;
+ if (!frame_is_intra_only(cm)) {
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+ vp9_adapt_mode_probs(cm);
+ vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ }
+ }
#if 0
output_frame_level_debug_stats(cpi);
#endif
if (cpi->refresh_golden_frame == 1)
- cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
+ cm->frame_flags |= FRAMEFLAGS_GOLDEN;
else
- cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
+ cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
if (cpi->refresh_alt_ref_frame == 1)
- cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
+ cm->frame_flags |= FRAMEFLAGS_ALTREF;
else
- cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
+ cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
get_ref_frame_flags(cpi);
@@ -3253,6 +3364,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// reset to normal state now that we are done.
if (!cm->show_existing_frame)
cm->last_show_frame = cm->show_frame;
+
if (cm->show_frame) {
// current mip will be the prev_mip for the next frame
MODE_INFO *temp = cm->prev_mip;
@@ -3273,6 +3385,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// update not a real frame
++cm->current_video_frame;
}
+
// restore prev_mi
cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
@@ -3280,16 +3393,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags) {
- vp9_get_svc_params(cpi);
+ vp9_rc_get_svc_params(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
}
static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags) {
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
- vp9_get_one_pass_cbr_params(cpi);
+ vp9_rc_get_one_pass_cbr_params(cpi);
} else {
- vp9_get_one_pass_params(cpi);
+ vp9_rc_get_one_pass_vbr_params(cpi);
}
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
}
@@ -3300,16 +3413,16 @@ static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
(void) dest;
(void) frame_flags;
- vp9_get_first_pass_params(cpi);
+ vp9_rc_get_first_pass_params(cpi);
vp9_set_quantizer(cpi, find_fp_qindex());
vp9_first_pass(cpi);
}
static void Pass2Encode(VP9_COMP *cpi, size_t *size,
uint8_t *dest, unsigned int *frame_flags) {
- cpi->enable_encode_breakout = 1;
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
- vp9_get_second_pass_params(cpi);
+ vp9_rc_get_second_pass_params(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
vp9_twopass_postencode_update(cpi, *size);
@@ -3318,6 +3431,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
int subsampling_y) {
VP9_COMMON *const cm = &cpi->common;
+
if (!cpi->initial_width) {
cm->subsampling_x = subsampling_x;
cm->subsampling_y = subsampling_y;
@@ -3331,12 +3445,12 @@ static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
- VP9_COMP *cpi = (VP9_COMP *) ptr;
- VP9_COMMON *cm = &cpi->common;
- struct vpx_usec_timer timer;
- int res = 0;
- const int subsampling_x = sd->uv_width < sd->y_width;
- const int subsampling_y = sd->uv_height < sd->y_height;
+ VP9_COMP *cpi = (VP9_COMP *)ptr;
+ VP9_COMMON *cm = &cpi->common;
+ struct vpx_usec_timer timer;
+ int res = 0;
+ const int subsampling_x = sd->uv_width < sd->y_width;
+ const int subsampling_y = sd->uv_height < sd->y_height;
check_initial_width(cpi, subsampling_x, subsampling_y);
vpx_usec_timer_start(&timer);
@@ -3558,11 +3672,17 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
adjust_frame_rate(cpi);
}
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ update_layer_framerate(cpi);
+ restore_layer_context(cpi);
+ }
+
// start with a 0 size frame
*size = 0;
// Clear down mmx registers
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
/* find a free buffer for the new frame, releasing the reference previously
* held.
@@ -3587,7 +3707,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS);
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
@@ -3607,8 +3727,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
xd->interp_kernel = vp9_get_interp_kernel(
DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ)
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_vaq_init();
+ }
if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
@@ -3633,6 +3754,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
cpi->droppable = !frame_is_reference(cpi);
}
+ // Save layer specific state.
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ save_layer_context(cpi);
+ }
+
vpx_usec_timer_mark(&cmptimer);
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
@@ -3642,7 +3769,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
#if CONFIG_INTERNAL_STATS
if (cpi->pass != 1) {
- cpi->bytes += *size;
+ cpi->bytes += (int)(*size);
if (cm->show_frame) {
cpi->count++;
@@ -3717,22 +3844,23 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags) {
- VP9_COMP *cpi = (VP9_COMP *) comp;
+ VP9_COMP *cpi = (VP9_COMP *)comp;
+ VP9_COMMON *cm = &cpi->common;
- if (!cpi->common.show_frame) {
+ if (!cm->show_frame) {
return -1;
} else {
int ret;
#if CONFIG_VP9_POSTPROC
- ret = vp9_post_proc_frame(&cpi->common, dest, flags);
+ ret = vp9_post_proc_frame(cm, dest, flags);
#else
- if (cpi->common.frame_to_show) {
- *dest = *cpi->common.frame_to_show;
- dest->y_width = cpi->common.width;
- dest->y_height = cpi->common.height;
- dest->uv_width = cpi->common.width >> cpi->common.subsampling_x;
- dest->uv_height = cpi->common.height >> cpi->common.subsampling_y;
+ if (cm->frame_to_show) {
+ *dest = *cm->frame_to_show;
+ dest->y_width = cm->width;
+ dest->y_height = cm->height;
+ dest->uv_width = cm->width >> cm->subsampling_x;
+ dest->uv_height = cm->height >> cm->subsampling_y;
ret = 0;
} else {
ret = -1;
@@ -3846,11 +3974,11 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
cm->width = width;
if (cm->width * 5 < cpi->initial_width) {
cm->width = cpi->initial_width / 5 + 1;
- printf("Warning: Desired width too small, changed to %d \n", cm->width);
+ printf("Warning: Desired width too small, changed to %d\n", cm->width);
}
if (cm->width > cpi->initial_width) {
cm->width = cpi->initial_width;
- printf("Warning: Desired width too large, changed to %d \n", cm->width);
+ printf("Warning: Desired width too large, changed to %d\n", cm->width);
}
}
@@ -3858,11 +3986,11 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
cm->height = height;
if (cm->height * 5 < cpi->initial_height) {
cm->height = cpi->initial_height / 5 + 1;
- printf("Warning: Desired height too small, changed to %d \n", cm->height);
+ printf("Warning: Desired height too small, changed to %d\n", cm->height);
}
if (cm->height > cpi->initial_height) {
cm->height = cpi->initial_height;
- printf("Warning: Desired height too large, changed to %d \n", cm->height);
+ printf("Warning: Desired height too large, changed to %d\n", cm->height);
}
}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index d928312b6..fd2356591 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -23,7 +23,9 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
@@ -35,17 +37,17 @@
extern "C" {
#endif
-#define DISABLE_RC_LONG_TERM_MEM 0
// #define MODE_TEST_HIT_STATS
-// #define SPEEDSTATS 1
#if CONFIG_MULTIPLE_ARF
// Set MIN_GF_INTERVAL to 1 for the full decomposition.
#define MIN_GF_INTERVAL 2
#else
#define MIN_GF_INTERVAL 4
#endif
-#define DEFAULT_GF_INTERVAL 7
+#define DEFAULT_GF_INTERVAL 10
+#define DEFAULT_KF_BOOST 2000
+#define DEFAULT_GF_BOOST 2000
#define KEY_FRAME_CONTEXT 5
@@ -78,42 +80,6 @@ typedef struct {
FRAME_CONTEXT fc;
} CODING_CONTEXT;
-typedef struct {
- double frame;
- double intra_error;
- double coded_error;
- double sr_coded_error;
- double ssim_weighted_pred_err;
- double pcnt_inter;
- double pcnt_motion;
- double pcnt_second_ref;
- double pcnt_neutral;
- double MVr;
- double mvr_abs;
- double MVc;
- double mvc_abs;
- double MVrv;
- double MVcv;
- double mv_in_out_count;
- double new_mv_count;
- double duration;
- double count;
-} FIRSTPASS_STATS;
-
-typedef struct {
- struct {
- int err;
- union {
- int_mv mv;
- MB_PREDICTION_MODE mode;
- } m;
- } ref[MAX_REF_FRAMES];
-} MBGRAPH_MB_STATS;
-
-typedef struct {
- MBGRAPH_MB_STATS *mb_stats;
-} MBGRAPH_FRAME_STATS;
-
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
@@ -170,7 +136,8 @@ typedef enum {
NSTEP = 1,
HEX = 2,
BIGDIA = 3,
- SQUARE = 4
+ SQUARE = 4,
+ FAST_HEX = 5
} SEARCH_METHODS;
typedef enum {
@@ -231,18 +198,50 @@ typedef enum {
LAST_FRAME_PARTITION_ALL = 2
} LAST_FRAME_PARTITION_METHOD;
+typedef enum {
+ // No recode.
+ DISALLOW_RECODE = 0,
+ // Allow recode for KF and exceeding maximum frame bandwidth.
+ ALLOW_RECODE_KFMAXBW = 1,
+ // Allow recode only for KF/ARF/GF frames.
+ ALLOW_RECODE_KFARFGF = 2,
+ // Allow recode for all frames based on bitrate constraints.
+ ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
+typedef enum {
+ // encode_breakout is disabled.
+ ENCODE_BREAKOUT_DISABLED = 0,
+ // encode_breakout is enabled.
+ ENCODE_BREAKOUT_ENABLED = 1,
+ // encode_breakout is enabled with small max_thresh limit.
+ ENCODE_BREAKOUT_LIMITED = 2
+} ENCODE_BREAKOUT_TYPE;
+
+typedef enum {
+ // Search partitions using RD/NONRD criterion
+ SEARCH_PARTITION = 0,
+
+ // Always use a fixed size partition
+ FIXED_PARTITION = 1,
+
+ // Use a fixed size partition in every 64X64 SB, where the size is
+ // determined based on source variance
+ VAR_BASED_FIXED_PARTITION = 2,
+
+ // Use an arbitrary partitioning scheme based on source variance within
+ // a 64X64 SB
+ VAR_BASED_PARTITION
+} PARTITION_SEARCH_TYPE;
+
typedef struct {
- // This flag refers to whether or not to perform rd optimization.
- int RD;
+ // Frame level coding parameter update
+ int frame_parameter_update;
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
- // Recode_loop can be:
- // 0 means we only encode a frame once
- // 1 means we can re-encode based on bitrate constraints on any frame
- // 2 means we can only recode gold, alt, and key frames.
- int recode_loop;
+ RECODE_LOOP_TYPE recode_loop;
// Subpel_search_method can only be subpel_tree which does a subpixel
// logarithmic search that keeps stepping at 1/2 pixel units until
@@ -321,16 +320,6 @@ typedef struct {
// TODO(JBB): remove this as its no longer used.
- // If set partition size will always be always_this_block_size.
- int use_one_partition_size_always;
-
- // Skip rectangular partition test when partition type none gives better
- // rd than partition type split.
- int less_rectangular_check;
-
- // Disable testing non square partitions. (eg 16x32)
- int use_square_partition_only;
-
// After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame
// of the best so far.
@@ -339,9 +328,18 @@ typedef struct {
// TODO(JBB): Remove this.
int reference_masking;
- // Used in conjunction with use_one_partition_size_always.
+ PARTITION_SEARCH_TYPE partition_search_type;
+
+ // Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;
+ // Skip rectangular partition test when partition type none gives better
+ // rd than partition type split.
+ int less_rectangular_check;
+
+ // Disable testing non square partitions. (eg 16x32)
+ int use_square_partition_only;
+
// Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor.
AUTO_MIN_MAX_MODE auto_min_max_partition_size;
@@ -364,11 +362,6 @@ typedef struct {
// inter modes or to enable it always.
int disable_split_mask;
- // TODO(jbb): Remove this and everything that uses it. It's only valid if
- // we were doing small to large partition checks. We currently do the
- // reverse.
- int using_small_partition_info;
-
// TODO(jingning): combine the related motion search speed features
// This allows us to use motion search at other sizes as a starting
// point for this motion search and limits the search range around it.
@@ -417,10 +410,24 @@ typedef struct {
// by only looking at counts from 1/2 the bands.
int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
- // This flag control the use of the new super fast rtc mode
- int super_fast_rtc;
+ // This flag controls the use of non-RD mode decision.
+ int use_nonrd_pick_mode;
+
+ // This variable sets the encode_breakout threshold. Currently, it is only
+ // enabled in real time mode.
+ int encode_breakout_thresh;
} SPEED_FEATURES;
+typedef struct {
+ RATE_CONTROL rc;
+ int target_bandwidth;
+ int64_t starting_buffer_level;
+ int64_t optimal_buffer_level;
+ int64_t maximum_buffer_size;
+ double framerate;
+ int avg_frame_size;
+} LAYER_CONTEXT;
+
typedef struct VP9_COMP {
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -454,7 +461,7 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG *un_scaled_source;
YV12_BUFFER_CONFIG scaled_source;
- unsigned int key_frame_frequency;
+ int key_frame_frequency;
int gold_is_last; // gold same as last frame ( short circuit gold searches)
int alt_is_last; // Alt same as last ( short circuit altref search)
@@ -465,9 +472,6 @@ typedef struct VP9_COMP {
int gld_fb_idx;
int alt_fb_idx;
- int current_layer;
- int use_svc;
-
#if CONFIG_MULTIPLE_ARF
int alt_ref_fb_idx[REF_FRAMES - 3];
#endif
@@ -498,12 +502,6 @@ typedef struct VP9_COMP {
// Ambient reconstruction err target for force key frames
int ambient_err;
- unsigned int mode_chosen_counts[MAX_MODES];
- unsigned int sub8x8_mode_chosen_counts[MAX_REFS];
- int64_t mode_skip_mask;
- int ref_frame_mask;
- int set_ref_frame_mask;
-
int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
@@ -543,7 +541,6 @@ typedef struct VP9_COMP {
vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
- int64_t target_bandwidth;
struct vpx_codec_pkt_list *output_pkt_list;
MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
@@ -567,6 +564,13 @@ typedef struct VP9_COMP {
unsigned int max_mv_magnitude;
int mv_step_param;
+ // Default value is 1. From first pass stats, encode_breakout may be disabled.
+ ENCODE_BREAKOUT_TYPE allow_encode_breakout;
+
+ // Get threshold from external input. In real time mode, it can be
+ // overwritten according to encoding speed.
+ int encode_breakout;
+
unsigned char *segmentation_map;
// segment threashold for encode breakout
@@ -588,52 +592,15 @@ typedef struct VP9_COMP {
uint64_t time_pick_lpf;
uint64_t time_encode_sb_row;
- struct twopass_rc {
- unsigned int section_intra_rating;
- unsigned int next_iiratio;
- unsigned int this_iiratio;
- FIRSTPASS_STATS total_stats;
- FIRSTPASS_STATS this_frame_stats;
- FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
- FIRSTPASS_STATS total_left_stats;
- int first_pass_done;
- int64_t bits_left;
- int64_t clip_bits_total;
- double avg_iiratio;
- double modified_error_min;
- double modified_error_max;
- double modified_error_total;
- double modified_error_left;
- double kf_intra_err_min;
- double gf_intra_err_min;
- int static_scene_max_gf_interval;
- int kf_bits;
- // Remaining error from uncoded frames in a gf group. Two pass use only
- int64_t gf_group_error_left;
-
- // Projected total bits available for a key frame group of frames
- int64_t kf_group_bits;
-
- // Error score of frames still to be coded in kf group
- int64_t kf_group_error_left;
-
- // Projected Bits available for a group of frames including 1 GF or ARF
- int64_t gf_group_bits;
- // Bits for the golden frame or ARF - 2 pass only
- int gf_bits;
- int alt_extra_bits;
-
- int sr_update_lag;
-
- int kf_zeromotion_pct;
- int gf_zeromotion_pct;
- } twopass;
+ struct twopass_rc twopass;
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
int fixed_divide[512];
#if CONFIG_INTERNAL_STATS
+ unsigned int mode_chosen_counts[MAX_MODES];
+
int count;
double total_y;
double total_u;
@@ -684,9 +651,17 @@ typedef struct VP9_COMP {
int initial_width;
int initial_height;
- int number_spatial_layers;
- int enable_encode_breakout; // Default value is 1. From first pass stats,
- // encode_breakout may be disabled.
+ int use_svc;
+
+ struct svc {
+ int spatial_layer_id;
+ int temporal_layer_id;
+ int number_spatial_layers;
+ int number_temporal_layers;
+ // Layer context used for rate control in CBR mode, only defined for
+ // temporal layers for now.
+ LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
+ } svc;
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
@@ -741,8 +716,6 @@ void vp9_encode_frame(VP9_COMP *cpi);
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
-
void vp9_set_speed_features(VP9_COMP *cpi);
int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 1aaa4162b..87f20fa1c 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -98,10 +98,14 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
- bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
- sadpb, further_steps, 1,
- &cpi->fn_ptr[bsize],
- &ref_mv.as_mv, tmp_mv);
+ if (cpi->sf.search_method == FAST_HEX) {
+ vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, &cpi->fn_ptr[bsize],
+ 1, &ref_mv.as_mv, &tmp_mv->as_mv);
+ } else {
+ vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps,
+ 1, &cpi->fn_ptr[bsize], &ref_mv.as_mv,
+ &tmp_mv->as_mv);
+ }
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
@@ -130,9 +134,50 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// calculate the bit cost on motion vector
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ return bestsme;
+}
+static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+ const TileInfo *const tile,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int_mv *tmp_mv) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+ int ref = mbmi->ref_frame[0];
+ int_mv ref_mv = mbmi->ref_mvs[ref][0];
+ int dis;
- return bestsme;
+ const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
+ ref);
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[0];
+
+ setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
+
+ tmp_mv->as_mv.col >>= 3;
+ tmp_mv->as_mv.row >>= 3;
+
+ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.subpel_force_stop,
+ cpi->sf.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost,
+ &dis, &x->pred_sse[ref]);
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[0] = backup_yv12[i];
+ }
}
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
@@ -145,16 +190,21 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
- MB_PREDICTION_MODE this_mode;
- MV_REFERENCE_FRAME ref_frame;
+ MB_PREDICTION_MODE this_mode, best_mode = ZEROMV;
+ MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
int64_t this_rd;
- int64_t cost[4]= { 0, 100, 150, 205 };
+ static const int cost[4]= { 0, 50, 75, 100 };
+
+ const int64_t inter_mode_thresh = 300;
+ const int64_t intra_mode_cost = 50;
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -164,12 +214,17 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// initialize mode decisions
*returnrate = INT_MAX;
+ *returndistortion = INT64_MAX;
vpx_memset(mbmi, 0, sizeof(MB_MODE_INFO));
mbmi->sb_type = bsize;
mbmi->ref_frame[0] = NONE;
mbmi->ref_frame[1] = NONE;
mbmi->tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
+ EIGHTTAP : cpi->common.interp_filter;
+ mbmi->skip = 0;
+ mbmi->segment_id = 0;
for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
@@ -194,12 +249,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
+ mbmi->ref_frame[0] = ref_frame;
+
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
- int rate = cost[this_mode - NEARESTMV];
+ int rate = cost[INTER_OFFSET(this_mode)];
int64_t dist;
if (this_mode == NEWMV) {
- if (this_rd < 300)
+ if (this_rd < 500)
continue;
x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
@@ -208,34 +265,55 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
continue;
+
+ sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame]);
}
- dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
+ mbmi->mode = this_mode;
+ mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+
+ dist = cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, INT_MAX);
this_rd = rate + dist;
if (this_rd < best_rd) {
best_rd = this_rd;
- mbmi->mode = this_mode;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
- xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
- mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
- EIGHTTAP : cpi->common.interp_filter;
-
- mbmi->ref_frame[1] = INTRA_FRAME;
- mbmi->tx_size = max_txsize_lookup[bsize];
- mbmi->uv_mode = this_mode;
- mbmi->skip_coeff = 0;
- mbmi->sb_type = bsize;
- mbmi->segment_id = 0;
+ best_mode = this_mode;
+ best_ref_frame = ref_frame;
}
}
}
- // TODO(jingning) sub-pixel motion search, if NEWMV is chosen
+ mbmi->mode = best_mode;
+ mbmi->ref_frame[0] = best_ref_frame;
+ mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
- // TODO(jingning) intra prediction search, if the best SAD is above a certain
+ // Perform intra prediction search, if the best SAD is above a certain
// threshold.
+ if (best_rd > inter_mode_thresh) {
+ for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
+ vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
+ mbmi->tx_size, this_mode,
+ &p->src.buf[0], p->src.stride,
+ &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
+
+ this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf,
+ p->src.stride,
+ pd->dst.buf,
+ pd->dst.stride, INT_MAX);
+
+ if (this_rd + intra_mode_cost < best_rd) {
+ best_rd = this_rd;
+ mbmi->mode = this_mode;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->uv_mode = this_mode;
+ mbmi->mv[0].as_int = INVALID_MV;
+ }
+ }
+ }
return INT64_MAX;
}
diff --git a/vp9/encoder/vp9_psnr.c b/vp9/encoder/vp9_psnr.c
deleted file mode 100644
index 58294e15a..000000000
--- a/vp9/encoder/vp9_psnr.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-
-#include "vpx_scale/yv12config.h"
-
-#define MAX_PSNR 100
-
-double vp9_mse2psnr(double samples, double peak, double mse) {
- double psnr;
-
- if (mse > 0.0)
- psnr = 10.0 * log10(peak * peak * samples / mse);
- else
- psnr = MAX_PSNR; // Limit to prevent / 0
-
- if (psnr > MAX_PSNR)
- psnr = MAX_PSNR;
-
- return psnr;
-}
diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h
deleted file mode 100644
index ffe00ed2c..000000000
--- a/vp9/encoder/vp9_psnr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_PSNR_H_
-#define VP9_ENCODER_VP9_PSNR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-double vp9_mse2psnr(double samples, double peak, double mse);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_VP9_PSNR_H_
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index a2eea1cd7..372c36221 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -26,7 +26,7 @@ void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i, non_zero_count = count, eob = -1;
+ int i, non_zero_count = (int)count, eob = -1;
const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
zbin_ptr[1] + zbin_oq_value };
const int nzbins[2] = { zbins[0] * -1,
@@ -37,7 +37,7 @@ void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
if (!skip_block) {
// Pre-scan pass
- for (i = count - 1; i >= 0; i--) {
+ for (i = (int)count - 1; i >= 0; i--) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
@@ -79,55 +79,47 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i, rc, eob;
- int zbins[2], nzbins[2];
- int x, y, z, sz;
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
int idx = 0;
int idx_arr[1024];
+ int i, eob = -1;
- vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
- vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-
- eob = -1;
-
- // Base ZBIN
- zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
- zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
- nzbins[0] = zbins[0] * -1;
- nzbins[1] = zbins[1] * -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
- rc = scan[i];
- z = coeff_ptr[rc];
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
- if (z >= zbins[rc != 0] || z <= nzbins[rc != 0])
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
- rc = scan[idx_arr[i]];
-
- z = coeff_ptr[rc];
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- x = clamp(x, INT16_MIN, INT16_MAX);
- y = ((((x * quant_ptr[rc != 0]) >> 16) + x) *
- quant_shift_ptr[rc != 0]) >> 15; // quantize (x)
-
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / 2; // dequantized value
-
- if (y)
- eob = idx_arr[i]; // last nonzero coeffs
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int tmp;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
+ quant_shift_ptr[rc != 0]) >> 15;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+ if (tmp)
+ eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
@@ -136,8 +128,8 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan) {
MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane* p = &x->plane[plane];
- struct macroblockd_plane* pd = &xd->plane[plane];
+ struct macroblock_plane *p = &x->plane[plane];
+ struct macroblockd_plane *pd = &xd->plane[plane];
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
@@ -223,38 +215,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
}
void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
- int i;
- VP9_COMMON *const cm = &cpi->common;
+ const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- int zbin_extra;
- int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
- const int qindex = vp9_get_qindex(&cpi->common.seg, segment_id,
- cpi->common.base_qindex);
-
- int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+ const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+ const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+ const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+ const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj;
+ int i;
// Y
- zbin_extra = (cpi->common.y_dequant[qindex][1] *
- (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
x->plane[0].quant = cpi->y_quant[qindex];
x->plane[0].quant_shift = cpi->y_quant_shift[qindex];
x->plane[0].zbin = cpi->y_zbin[qindex];
x->plane[0].round = cpi->y_round[qindex];
- x->plane[0].zbin_extra = (int16_t)zbin_extra;
- x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
+ x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
+ xd->plane[0].dequant = cm->y_dequant[qindex];
// UV
- zbin_extra = (cpi->common.uv_dequant[qindex][1] *
- (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
for (i = 1; i < 3; i++) {
x->plane[i].quant = cpi->uv_quant[qindex];
x->plane[i].quant_shift = cpi->uv_quant_shift[qindex];
x->plane[i].zbin = cpi->uv_zbin[qindex];
x->plane[i].round = cpi->uv_round[qindex];
- x->plane[i].zbin_extra = (int16_t)zbin_extra;
- x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
+ x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
+ xd->plane[i].dequant = cm->uv_dequant[qindex];
}
#if CONFIG_ALPHA
@@ -263,18 +247,14 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[3].zbin = cpi->a_zbin[qindex];
x->plane[3].round = cpi->a_round[qindex];
x->plane[3].zbin_extra = (int16_t)zbin_extra;
- x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex];
+ xd->plane[3].dequant = cm->a_dequant[qindex];
#endif
- x->skip_block = vp9_segfeature_active(&cpi->common.seg, segment_id,
- SEG_LVL_SKIP);
-
- /* save this macroblock QIndex for vp9_update_zbin_extra() */
+ x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
- /* R/D setup */
- cpi->mb.errorperbit = rdmult >> 6;
- cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
+ x->errorperbit = rdmult >> 6;
+ x->errorperbit += (x->errorperbit == 0);
vp9_initialize_me_consts(cpi, x->q_index);
}
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 701557238..89aa82140 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -8,23 +8,24 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
#include <assert.h>
+#include <limits.h>
#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_common.h"
-#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/common/vp9_entropymode.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/encoder/vp9_ratectrl.h"
#define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1
@@ -209,51 +210,60 @@ static int estimate_bits_at_q(int frame_kind, int q, int mbs,
: (bpm * mbs) >> BPER_MB_NORMBITS;
}
-
-static void calc_iframe_target_size(VP9_COMP *cpi) {
- const VP9_CONFIG *oxcf = &cpi->oxcf;
- RATE_CONTROL *const rc = &cpi->rc;
- int target;
-
- vp9_clear_system_state(); // __asm emms;
-
- // For 1-pass.
- if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
- if (cpi->common.current_video_frame == 0) {
- target = oxcf->starting_buffer_level / 2;
- } else {
- // TODO(marpan): Add in adjustment based on Q.
- // If this keyframe was forced, use a more recent Q estimate.
- // int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY) ?
- // cpi->rc.avg_frame_qindex : cpi->rc.ni_av_qi;
- int initial_boost = 32;
- // Boost depends somewhat on frame rate.
- int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
- // Adjustment up based on q: need to fix.
- // kf_boost = kf_boost * kfboost_qadjust(Q) / 100;
- // Frame separation adjustment (down).
- if (rc->frames_since_key < cpi->output_framerate / 2) {
- kf_boost = (int)(kf_boost * rc->frames_since_key /
- (cpi->output_framerate / 2));
- }
- kf_boost = (kf_boost < 16) ? 16 : kf_boost;
- target = ((16 + kf_boost) * rc->per_frame_bandwidth) >> 4;
- }
- rc->active_worst_quality = rc->worst_quality;
- } else {
- target = rc->per_frame_bandwidth;
+int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
+ const RATE_CONTROL *rc = &cpi->rc;
+ const int min_frame_target = MAX(rc->min_frame_bandwidth,
+ rc->av_per_frame_bandwidth >> 5);
+ if (target < min_frame_target)
+ target = min_frame_target;
+ if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
+ // If there is an active ARF at this location use the minimum
+ // bits on this frame even if it is a constructed arf.
+ // The active maximum quantizer insures that an appropriate
+ // number of bits will be spent if needed for constructed ARFs.
+ target = min_frame_target;
}
+ // Clip the frame target to the maximum allowed value.
+ if (target > rc->max_frame_bandwidth)
+ target = rc->max_frame_bandwidth;
+ return target;
+}
+int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
+ const RATE_CONTROL *rc = &cpi->rc;
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
if (oxcf->rc_max_intra_bitrate_pct) {
- const int max_rate = rc->per_frame_bandwidth *
+ const int max_rate = rc->av_per_frame_bandwidth *
oxcf->rc_max_intra_bitrate_pct / 100;
target = MIN(target, max_rate);
}
- rc->this_frame_target = target;
+ if (target > rc->max_frame_bandwidth)
+ target = rc->max_frame_bandwidth;
+ return target;
+}
+
+
+// Update the buffer level for higher layers, given the encoded current layer.
+static void update_layer_buffer_level(VP9_COMP *const cpi,
+ int encoded_frame_size) {
+ int temporal_layer = 0;
+ int current_temporal_layer = cpi->svc.temporal_layer_id;
+ for (temporal_layer = current_temporal_layer + 1;
+ temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) {
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
+ encoded_frame_size);
+ lrc->bits_off_target += bits_off_for_this_layer;
+
+ // Clip buffer level to maximum buffer size for the layer.
+ lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+ lrc->buffer_level = lrc->bits_off_target;
+ }
}
// Update the buffer level: leaky bucket model.
-void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
+static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
const VP9_COMMON *const cm = &cpi->common;
const VP9_CONFIG *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
@@ -266,14 +276,18 @@ void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
}
// Clip the buffer level to the maximum specified buffer size.
- rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+ rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+ rc->buffer_level = rc->bits_off_target;
+
+ if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ update_layer_buffer_level(cpi, encoded_frame_size);
+ }
}
-int vp9_drop_frame(VP9_COMP *cpi) {
+int vp9_rc_drop_frame(VP9_COMP *cpi) {
const VP9_CONFIG *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
-
if (!oxcf->drop_frames_water_mark) {
return 0;
} else {
@@ -284,7 +298,7 @@ int vp9_drop_frame(VP9_COMP *cpi) {
// If buffer is below drop_mark, for now just drop every other frame
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark = (int)(oxcf->drop_frames_water_mark *
- oxcf->optimal_buffer_level / 100);
+ oxcf->optimal_buffer_level / 100);
if ((rc->buffer_level > drop_mark) &&
(rc->decimation_factor > 0)) {
--rc->decimation_factor;
@@ -308,127 +322,12 @@ int vp9_drop_frame(VP9_COMP *cpi) {
}
}
-// Adjust active_worst_quality level based on buffer level.
-static int adjust_active_worst_quality_from_buffer_level(const VP9_CONFIG *oxcf,
- const RATE_CONTROL *rc) {
- // Adjust active_worst_quality: If buffer is above the optimal/target level,
- // bring active_worst_quality down depending on fullness over buffer.
- // If buffer is below the optimal level, let the active_worst_quality go from
- // ambient Q (at buffer = optimal level) to worst_quality level
- // (at buffer = critical level).
-
- int active_worst_quality = rc->active_worst_quality;
- // Maximum limit for down adjustment, ~20%.
- int max_adjustment_down = active_worst_quality / 5;
- // Buffer level below which we push active_worst to worst_quality.
- int critical_level = oxcf->optimal_buffer_level >> 2;
- int adjustment = 0;
- int buff_lvl_step = 0;
- if (rc->buffer_level > oxcf->optimal_buffer_level) {
- // Adjust down.
- if (max_adjustment_down) {
- buff_lvl_step = (int)((oxcf->maximum_buffer_size -
- oxcf->optimal_buffer_level) / max_adjustment_down);
- if (buff_lvl_step)
- adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) /
- buff_lvl_step);
- active_worst_quality -= adjustment;
- }
- } else if (rc->buffer_level > critical_level) {
- // Adjust up from ambient Q.
- if (critical_level) {
- buff_lvl_step = (oxcf->optimal_buffer_level - critical_level);
- if (buff_lvl_step) {
- adjustment = (rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) *
- (oxcf->optimal_buffer_level - rc->buffer_level) /
- buff_lvl_step;
- }
- active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment;
- }
- } else {
- // Set to worst_quality if buffer is below critical level.
- active_worst_quality = rc->worst_quality;
- }
- return active_worst_quality;
-}
-
-// Adjust target frame size with respect to the buffering constraints:
-static int target_size_from_buffer_level(const VP9_CONFIG *oxcf,
- const RATE_CONTROL *rc) {
- int target = rc->this_frame_target;
- const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
- const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
-
- if (diff > 0) {
- // Lower the target bandwidth for this frame.
- const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
- target -= (target * pct_low) / 200;
- } else if (diff < 0) {
- // Increase the target bandwidth for this frame.
- const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
- target += (target * pct_high) / 200;
- }
-
- return target;
-}
-
-static void calc_pframe_target_size(VP9_COMP *const cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
- const VP9_CONFIG *const oxcf = &cpi->oxcf;
- int min_frame_target;
- rc->this_frame_target = rc->per_frame_bandwidth;
-
- if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
- // Need to decide how low min_frame_target should be for 1-pass CBR.
- // For now, use: cpi->rc.av_per_frame_bandwidth / 16:
- min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
- FRAME_OVERHEAD_BITS);
- rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
- // Adjust qp-max based on buffer level.
- rc->active_worst_quality =
- adjust_active_worst_quality_from_buffer_level(oxcf, rc);
-
- if (rc->this_frame_target < min_frame_target)
- rc->this_frame_target = min_frame_target;
- return;
- }
-
- // Check that the total sum of adjustments is not above the maximum allowed.
- // That is, having allowed for the KF and GF penalties, we have not pushed
- // the current inter-frame target too low. If the adjustment we apply here is
- // not capable of recovering all the extra bits we have spent in the KF or GF,
- // then the remainder will have to be recovered over a longer time span via
- // other buffer / rate control mechanisms.
- min_frame_target = MAX(rc->min_frame_bandwidth,
- rc->av_per_frame_bandwidth >> 5);
-
- if (rc->this_frame_target < min_frame_target)
- rc->this_frame_target = min_frame_target;
-
- // Adjust target frame size for Golden Frames:
- if (cpi->refresh_golden_frame) {
- // If we are using alternate ref instead of gf then do not apply the boost
- // It will instead be applied to the altref update
- // Jims modified boost
- if (!rc->source_alt_ref_active) {
- // The spend on the GF is defined in the two pass code
- // for two pass encodes
- rc->this_frame_target = rc->per_frame_bandwidth;
- } else {
- // If there is an active ARF at this location use the minimum
- // bits on this frame even if it is a constructed arf.
- // The active maximum quantizer insures that an appropriate
- // number of bits will be spent if needed for constructed ARFs.
- rc->this_frame_target = 0;
- }
- }
-}
-
static double get_rate_correction_factor(const VP9_COMP *cpi) {
if (cpi->common.frame_type == KEY_FRAME) {
return cpi->rc.key_frame_rate_correction_factor;
} else {
- if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
+ if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+ !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
return cpi->rc.gf_rate_correction_factor;
else
return cpi->rc.rate_correction_factor;
@@ -439,7 +338,8 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
if (cpi->common.frame_type == KEY_FRAME) {
cpi->rc.key_frame_rate_correction_factor = factor;
} else {
- if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
+ if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+ !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
cpi->rc.gf_rate_correction_factor = factor;
else
cpi->rc.rate_correction_factor = factor;
@@ -455,7 +355,7 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
int projected_size_based_on_q = 0;
// Clear down mmx registers to allow floating point in what follows
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
// Work out how big we would have expected the frame to be at this Q given
// the current correction factor.
@@ -463,7 +363,6 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q,
cpi->common.MBs,
rate_correction_factor);
-
// Work out a size correction factor.
if (projected_size_based_on_q > 0)
correction_factor = (100 * cpi->rc.projected_frame_size) /
@@ -562,13 +461,206 @@ static int get_active_quality(int q, int gfu_boost, int low, int high,
}
}
-int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
- int *bottom_index, int *top_index) {
+static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
+ int active_worst_quality;
+ if (cpi->common.frame_type == KEY_FRAME) {
+ if (cpi->common.current_video_frame == 0) {
+ active_worst_quality = cpi->rc.worst_quality;
+ } else {
+ // Choose active worst quality twice as large as the last q.
+ active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+ }
+ } else if (!cpi->rc.is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ if (cpi->common.current_video_frame == 1) {
+ active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4;
+ } else {
+ // Choose active worst quality twice as large as the last q.
+ active_worst_quality = cpi->rc.last_q[INTER_FRAME];
+ }
+ } else {
+ if (cpi->common.current_video_frame == 1) {
+ active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+ } else {
+ // Choose active worst quality twice as large as the last q.
+ active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+ }
+ }
+ if (active_worst_quality > cpi->rc.worst_quality)
+ active_worst_quality = cpi->rc.worst_quality;
+ return active_worst_quality;
+}
+
+// Adjust active_worst_quality level based on buffer level.
+static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
+ // Adjust active_worst_quality: If buffer is above the optimal/target level,
+ // bring active_worst_quality down depending on fullness of buffer.
+ // If buffer is below the optimal level, let the active_worst_quality go from
+ // ambient Q (at buffer = optimal level) to worst_quality level
+ // (at buffer = critical level).
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
+ const RATE_CONTROL *rc = &cpi->rc;
+ // Buffer level below which we push active_worst to worst_quality.
+ int64_t critical_level = oxcf->optimal_buffer_level >> 2;
+ int64_t buff_lvl_step = 0;
+ int adjustment = 0;
+ int active_worst_quality;
+ if (cpi->common.frame_type == KEY_FRAME)
+ return rc->worst_quality;
+ if (cpi->common.current_video_frame > 1)
+ active_worst_quality = MIN(rc->worst_quality,
+ rc->avg_frame_qindex[INTER_FRAME] * 5 / 4);
+ else
+ active_worst_quality = MIN(rc->worst_quality,
+ rc->avg_frame_qindex[KEY_FRAME] * 3 / 2);
+ if (rc->buffer_level > oxcf->optimal_buffer_level) {
+ // Adjust down.
+ // Maximum limit for down adjustment, ~30%.
+ int max_adjustment_down = active_worst_quality / 3;
+ if (max_adjustment_down) {
+ buff_lvl_step = ((oxcf->maximum_buffer_size -
+ oxcf->optimal_buffer_level) / max_adjustment_down);
+ if (buff_lvl_step)
+ adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) /
+ buff_lvl_step);
+ active_worst_quality -= adjustment;
+ }
+ } else if (rc->buffer_level > critical_level) {
+ // Adjust up from ambient Q.
+ if (critical_level) {
+ buff_lvl_step = (oxcf->optimal_buffer_level - critical_level);
+ if (buff_lvl_step) {
+ adjustment =
+ (int)((rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) *
+ (oxcf->optimal_buffer_level - rc->buffer_level) /
+ buff_lvl_step);
+ }
+ active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment;
+ }
+ } else {
+ // Set to worst_quality if buffer is below critical level.
+ active_worst_quality = rc->worst_quality;
+ }
+ return active_worst_quality;
+}
+
+static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
+ int *bottom_index,
+ int *top_index) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int active_best_quality;
+ int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
+ int q;
+
+ if (frame_is_intra_only(cm)) {
+ active_best_quality = rc->best_quality;
+ // Handle the special case for key frames forced when we have75 reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
+ if (rc->this_key_frame_forced) {
+ int qindex = rc->last_boosted_qindex;
+ double last_boosted_q = vp9_convert_qindex_to_q(qindex);
+ int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
+ (last_boosted_q * 0.75));
+ active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
+ } else if (cm->current_video_frame > 0) {
+ // not first frame of one pass and kf_boost is set
+ double q_adj_factor = 1.0;
+ double q_val;
+
+ active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME],
+ rc->kf_boost,
+ kf_low, kf_high,
+ kf_low_motion_minq,
+ kf_high_motion_minq);
+
+ // Allow somewhat lower kf minq with small image formats.
+ if ((cm->width * cm->height) <= (352 * 288)) {
+ q_adj_factor -= 0.25;
+ }
+
+ // Convert the adjustment factor to a qindex delta
+ // on active_best_quality.
+ q_val = vp9_convert_qindex_to_q(active_best_quality);
+ active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
+ q_adj_factor);
+ }
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ // Use the lower of active_worst_quality and recent
+ // average Q as basis for GF/ARF best Q limit unless last frame was
+ // a key frame.
+ if (rc->frames_since_key > 1 &&
+ rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+ q = rc->avg_frame_qindex[INTER_FRAME];
+ } else {
+ q = active_worst_quality;
+ }
+ active_best_quality = get_active_quality(
+ q, rc->gfu_boost, gf_low, gf_high,
+ gf_low_motion_minq, gf_high_motion_minq);
+ } else {
+ // Use the lower of active_worst_quality and recent/average Q.
+ if (cm->current_video_frame > 1) {
+ if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+ active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+ else
+ active_best_quality = inter_minq[active_worst_quality];
+ } else {
+ if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
+ active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ else
+ active_best_quality = inter_minq[active_worst_quality];
+ }
+ }
+
+ // Clip the active best and worst quality values to limits
+ active_best_quality = clamp(active_best_quality,
+ rc->best_quality, rc->worst_quality);
+ active_worst_quality = clamp(active_worst_quality,
+ active_best_quality, rc->worst_quality);
+
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+
+#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
+ // Limit Q range for the adaptive loop.
+ if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+ if (!(cm->current_video_frame == 0))
+ *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+ }
+#endif
+ // Special case code to try and match quality with forced key frames
+ if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
+ q = rc->last_boosted_qindex;
+ } else {
+ q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
+ active_best_quality, active_worst_quality);
+ if (q > *top_index) {
+ // Special case when we are targeting the max allowed rate
+ if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+ *top_index = q;
+ else
+ q = *top_index;
+ }
+ }
+ assert(*top_index <= rc->worst_quality &&
+ *top_index >= rc->best_quality);
+ assert(*bottom_index <= rc->worst_quality &&
+ *bottom_index >= rc->best_quality);
+ assert(q <= rc->worst_quality && q >= rc->best_quality);
+ return q;
+}
+
+static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
+ int *bottom_index,
+ int *top_index) {
const VP9_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const VP9_CONFIG *const oxcf = &cpi->oxcf;
int active_best_quality;
- int active_worst_quality = rc->active_worst_quality;
+ int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
int q;
if (frame_is_intra_only(cm)) {
@@ -583,13 +675,12 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
(last_boosted_q * 0.75));
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
- } else if (!(cpi->pass == 0 && cm->current_video_frame == 0)) {
+ } else if (cm->current_video_frame > 0) {
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
double q_val;
- // Baseline value derived from cpi->active_worst_quality and kf boost
- active_best_quality = get_active_quality(active_worst_quality,
+ active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME],
rc->kf_boost,
kf_low, kf_high,
kf_low_motion_minq,
@@ -600,9 +691,6 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
q_adj_factor -= 0.25;
}
- // Make a further adjustment based on the kf zero motion measure.
- q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
-
// Convert the adjustment factor to a qindex delta
// on active_best_quality.
q_val = vp9_convert_qindex_to_q(active_best_quality);
@@ -618,7 +706,6 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
#endif
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
@@ -626,7 +713,7 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
q = rc->avg_frame_qindex[INTER_FRAME];
} else {
- q = active_worst_quality;
+ q = rc->avg_frame_qindex[KEY_FRAME];
}
// For constrained quality dont allow Q less than the cq level
if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
@@ -669,14 +756,11 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
active_best_quality = cpi->cq_target_quality;
} else {
- if (cpi->pass == 0 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
- // 1-pass: for now, use the average Q for the active_best, if its lower
- // than active_worst.
+ // Use the lower of active_worst_quality and recent/average Q.
+ if (cm->current_video_frame > 1)
active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
else
- active_best_quality = inter_minq[active_worst_quality];
-
+ active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
// For the constrained quality mode we don't want
// q to fall below the cq level.
if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -693,17 +777,192 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
}
// Clip the active best and worst quality values to limits
- if (active_worst_quality > rc->worst_quality)
- active_worst_quality = rc->worst_quality;
+ active_best_quality = clamp(active_best_quality,
+ rc->best_quality, rc->worst_quality);
+ active_worst_quality = clamp(active_worst_quality,
+ active_best_quality, rc->worst_quality);
- if (active_best_quality < rc->best_quality)
- active_best_quality = rc->best_quality;
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
- if (active_best_quality > rc->worst_quality)
- active_best_quality = rc->worst_quality;
+#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
+ // Limit Q range for the adaptive loop.
+ if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+ if (!(cm->current_video_frame == 0))
+ *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ *top_index = (active_worst_quality + active_best_quality) / 2;
+ }
+#endif
+ if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+ q = active_best_quality;
+ // Special case code to try and match quality with forced key frames
+ } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
+ q = rc->last_boosted_qindex;
+ } else {
+ q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
+ active_best_quality, active_worst_quality);
+ if (q > *top_index) {
+ // Special case when we are targeting the max allowed rate
+ if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+ *top_index = q;
+ else
+ q = *top_index;
+ }
+ }
+#if CONFIG_MULTIPLE_ARF
+ // Force the quantizer determined by the coding order pattern.
+ if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
+ cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) {
+ double new_q;
+ double current_q = vp9_convert_qindex_to_q(active_worst_quality);
+ int level = cpi->this_frame_weight;
+ assert(level >= 0);
+ new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
+ q = active_worst_quality +
+ vp9_compute_qdelta(cpi, current_q, new_q);
+
+ *bottom_index = q;
+ *top_index = q;
+ printf("frame:%d q:%d\n", cm->current_video_frame, q);
+ }
+#endif
+ assert(*top_index <= rc->worst_quality &&
+ *top_index >= rc->best_quality);
+ assert(*bottom_index <= rc->worst_quality &&
+ *bottom_index >= rc->best_quality);
+ assert(q <= rc->worst_quality && q >= rc->best_quality);
+ return q;
+}
+
+static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
+ int *bottom_index,
+ int *top_index) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ int active_best_quality;
+ int active_worst_quality = cpi->twopass.active_worst_quality;
+ int q;
+
+ if (frame_is_intra_only(cm)) {
+#if !CONFIG_MULTIPLE_ARF
+ // Handle the special case for key frames forced when we have75 reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
+ if (rc->this_key_frame_forced) {
+ int qindex = rc->last_boosted_qindex;
+ double last_boosted_q = vp9_convert_qindex_to_q(qindex);
+ int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
+ (last_boosted_q * 0.75));
+ active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
+ } else {
+ // Not forced keyframe.
+ double q_adj_factor = 1.0;
+ double q_val;
+ // Baseline value derived from cpi->active_worst_quality and kf boost.
+ active_best_quality = get_active_quality(active_worst_quality,
+ rc->kf_boost,
+ kf_low, kf_high,
+ kf_low_motion_minq,
+ kf_high_motion_minq);
+
+ // Allow somewhat lower kf minq with small image formats.
+ if ((cm->width * cm->height) <= (352 * 288)) {
+ q_adj_factor -= 0.25;
+ }
+
+ // Make a further adjustment based on the kf zero motion measure.
+ q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
+
+ // Convert the adjustment factor to a qindex delta
+ // on active_best_quality.
+ q_val = vp9_convert_qindex_to_q(active_best_quality);
+ active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
+ q_adj_factor);
+ }
+#else
+ double current_q;
+ // Force the KF quantizer to be 30% of the active_worst_quality.
+ current_q = vp9_convert_qindex_to_q(active_worst_quality);
+ active_best_quality = active_worst_quality
+ + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
+#endif
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ // Use the lower of active_worst_quality and recent
+ // average Q as basis for GF/ARF best Q limit unless last frame was
+ // a key frame.
+ if (rc->frames_since_key > 1 &&
+ rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+ q = rc->avg_frame_qindex[INTER_FRAME];
+ } else {
+ q = active_worst_quality;
+ }
+ // For constrained quality dont allow Q less than the cq level
+ if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
+ if (q < cpi->cq_target_quality)
+ q = cpi->cq_target_quality;
+ if (rc->frames_since_key > 1) {
+ active_best_quality = get_active_quality(q, rc->gfu_boost,
+ gf_low, gf_high,
+ afq_low_motion_minq,
+ afq_high_motion_minq);
+ } else {
+ active_best_quality = get_active_quality(q, rc->gfu_boost,
+ gf_low, gf_high,
+ gf_low_motion_minq,
+ gf_high_motion_minq);
+ }
+ // Constrained quality use slightly lower active best.
+ active_best_quality = active_best_quality * 15 / 16;
- if (active_worst_quality < active_best_quality)
- active_worst_quality = active_best_quality;
+ } else if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+ if (!cpi->refresh_alt_ref_frame) {
+ active_best_quality = cpi->cq_target_quality;
+ } else {
+ if (rc->frames_since_key > 1) {
+ active_best_quality = get_active_quality(
+ q, rc->gfu_boost, gf_low, gf_high,
+ afq_low_motion_minq, afq_high_motion_minq);
+ } else {
+ active_best_quality = get_active_quality(
+ q, rc->gfu_boost, gf_low, gf_high,
+ gf_low_motion_minq, gf_high_motion_minq);
+ }
+ }
+ } else {
+ active_best_quality = get_active_quality(
+ q, rc->gfu_boost, gf_low, gf_high,
+ gf_low_motion_minq, gf_high_motion_minq);
+ }
+ } else {
+ if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+ active_best_quality = cpi->cq_target_quality;
+ } else {
+ active_best_quality = inter_minq[active_worst_quality];
+
+ // For the constrained quality mode we don't want
+ // q to fall below the cq level.
+ if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (active_best_quality < cpi->cq_target_quality)) {
+ // If we are strongly undershooting the target rate in the last
+ // frames then use the user passed in cq value not the auto
+ // cq value.
+ if (rc->rolling_actual_bits < rc->min_frame_bandwidth)
+ active_best_quality = oxcf->cq_level;
+ else
+ active_best_quality = cpi->cq_target_quality;
+ }
+ }
+ }
+
+ // Clip the active best and worst quality values to limits.
+ active_best_quality = clamp(active_best_quality,
+ rc->best_quality, rc->worst_quality);
+ active_worst_quality = clamp(active_worst_quality,
+ active_best_quality, rc->worst_quality);
*top_index = active_worst_quality;
*bottom_index = active_best_quality;
@@ -711,8 +970,7 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
// Limit Q range for the adaptive loop.
if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
- if (!(cpi->pass == 0 && cm->current_video_frame == 0))
- *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+ *top_index = (active_worst_quality + active_best_quality * 3) / 4;
} else if (!rc->is_src_frame_alt_ref &&
(oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -722,14 +980,14 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
q = active_best_quality;
- // Special case code to try and match quality with forced key frames
+ // Special case code to try and match quality with forced key frames.
} else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
} else {
q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
active_best_quality, active_worst_quality);
if (q > *top_index) {
- // Special case when we are targeting the max allowed rate
+ // Special case when we are targeting the max allowed rate.
if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
*top_index = q;
else
@@ -761,6 +1019,35 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
return q;
}
+int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
+ int *bottom_index,
+ int *top_index) {
+ int q;
+ if (cpi->pass == 0) {
+ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
+ q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
+ else
+ q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
+ } else {
+ q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
+ }
+
+ // JBB : This is realtime mode. In real time mode the first frame
+ // should be larger. Q of 0 is disabled because we force tx size to be
+ // 16x16...
+ if (cpi->sf.use_nonrd_pick_mode) {
+ if (cpi->common.current_video_frame == 0)
+ q /= 3;
+ if (q == 0)
+ q++;
+ if (q < *bottom_index)
+ *bottom_index = q;
+ else if (q > *top_index)
+ *top_index = q;
+ }
+ return q;
+}
+
void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
int this_frame_target,
int *frame_under_shoot_limit,
@@ -804,24 +1091,14 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
}
}
-// return of 0 means drop frame
-int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) {
+void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) {
const VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
- if (cm->frame_type == KEY_FRAME)
- calc_iframe_target_size(cpi);
- else
- calc_pframe_target_size(cpi);
-
- // Clip the frame target to the maximum allowed value.
- if (rc->this_frame_target > rc->max_frame_bandwidth)
- rc->this_frame_target = rc->max_frame_bandwidth;
-
+ rc->this_frame_target = target;
// Target rate per SB64 (including partial SB64s.
rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) /
(cm->width * cm->height);
- return 1;
}
static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
@@ -865,11 +1142,14 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+
+ cm->last_frame_type = cm->frame_type;
// Update rate control heuristics
- rc->projected_frame_size = (bytes_used << 3);
+ rc->projected_frame_size = (int)(bytes_used << 3);
// Post encode loop adjustment of Q prediction.
- vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop ||
+ vp9_rc_update_rate_correction_factors(
+ cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
// Keep a record of last Q and ambient average Q.
@@ -878,7 +1158,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
} else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+ !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
rc->last_q[2] = cm->base_qindex;
rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -907,7 +1188,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->last_boosted_qindex = cm->base_qindex;
}
- vp9_update_buffer_level(cpi, rc->projected_frame_size);
+ update_buffer_level(cpi, rc->projected_frame_size);
// Rolling monitors of whether we are over or underspending used to help
// regulate min and Max Q in two pass.
@@ -929,22 +1210,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->total_target_vs_actual += (rc->this_frame_target -
rc->projected_frame_size);
-#ifndef DISABLE_RC_LONG_TERM_MEM
- // Update bits left to the kf and gf groups to account for overshoot or
- // undershoot on these frames
- if (cm->frame_type == KEY_FRAME) {
- cpi->twopass.kf_group_bits += cpi->rc.this_frame_target -
- cpi->rc.projected_frame_size;
-
- cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0);
- } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
- cpi->twopass.gf_group_bits += cpi->rc.this_frame_target -
- cpi->rc.projected_frame_size;
-
- cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
- }
-#endif
-
if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame &&
(cm->frame_type != KEY_FRAME))
// Update the alternate reference frame stats as appropriate.
@@ -962,6 +1227,172 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
+ // Update buffer level with zero size, update frame counters, and return.
+ update_buffer_level(cpi, 0);
+ cpi->common.last_frame_type = cpi->common.frame_type;
cpi->rc.frames_since_key++;
cpi->rc.frames_to_key--;
}
+
+static int test_for_kf_one_pass(VP9_COMP *cpi) {
+ // Placeholder function for auto key frame
+ return 0;
+}
+// Use this macro to turn on/off use of alt-refs in one-pass mode.
+#define USE_ALTREF_FOR_ONE_PASS 1
+
+static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+ static const int af_ratio = 10;
+ const RATE_CONTROL *rc = &cpi->rc;
+ int target;
+#if USE_ALTREF_FOR_ONE_PASS
+ target = (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ?
+ (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) /
+ (cpi->rc.baseline_gf_interval + af_ratio - 1) :
+ (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) /
+ (cpi->rc.baseline_gf_interval + af_ratio - 1);
+#else
+ target = rc->av_per_frame_bandwidth;
+#endif
+ return vp9_rc_clamp_pframe_target_size(cpi, target);
+}
+
+static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+ static const int kf_ratio = 25;
+ const RATE_CONTROL *rc = &cpi->rc;
+ int target = rc->av_per_frame_bandwidth * kf_ratio;
+ return vp9_rc_clamp_iframe_target_size(cpi, target);
+}
+
+void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int target;
+ if (!cpi->refresh_alt_ref_frame &&
+ (cm->current_video_frame == 0 ||
+ cm->frame_flags & FRAMEFLAGS_KEY ||
+ rc->frames_to_key == 0 ||
+ (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
+ cm->frame_type = KEY_FRAME;
+ rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+ rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->key_frame_frequency;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+ if (rc->frames_till_gf_update_due == 0) {
+ rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+ if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ rc->frames_till_gf_update_due = rc->frames_to_key;
+ cpi->refresh_golden_frame = 1;
+ rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+ if (cm->frame_type == KEY_FRAME)
+ target = calc_iframe_target_size_one_pass_vbr(cpi);
+ else
+ target = calc_pframe_target_size_one_pass_vbr(cpi);
+ vp9_rc_set_frame_target(cpi, target);
+}
+
+static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
+ const RATE_CONTROL *rc = &cpi->rc;
+ const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
+ const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+ int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+ FRAME_OVERHEAD_BITS);
+ int target = rc->av_per_frame_bandwidth;
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ // Note that for layers, av_per_frame_bandwidth is the cumulative
+ // per-frame-bandwidth. For the target size of this frame, use the
+ // layer average frame size (i.e., non-cumulative per-frame-bw).
+ int current_temporal_layer = cpi->svc.temporal_layer_id;
+ const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+ target = lc->avg_frame_size;
+ min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
+ }
+ if (diff > 0) {
+ // Lower the target bandwidth for this frame.
+ const int pct_low = (int)MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
+ target -= (target * pct_low) / 200;
+ } else if (diff < 0) {
+ // Increase the target bandwidth for this frame.
+ const int pct_high = (int)MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
+ target += (target * pct_high) / 200;
+ }
+ return MAX(min_frame_target, target);
+}
+
+static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
+ const RATE_CONTROL *rc = &cpi->rc;
+ int target;
+
+ if (cpi->common.current_video_frame == 0) {
+ target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX)
+ ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2);
+ } else {
+ const int initial_boost = 32;
+ int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
+ if (rc->frames_since_key < cpi->output_framerate / 2) {
+ kf_boost = (int)(kf_boost * rc->frames_since_key /
+ (cpi->output_framerate / 2));
+ }
+ target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
+ }
+ return vp9_rc_clamp_iframe_target_size(cpi, target);
+}
+
+void vp9_rc_get_svc_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int target = cpi->rc.av_per_frame_bandwidth;
+ if ((cm->current_video_frame == 0) ||
+ (cm->frame_flags & FRAMEFLAGS_KEY) ||
+ (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
+ cpi->key_frame_frequency == 0))) {
+ cm->frame_type = KEY_FRAME;
+ cpi->rc.source_alt_ref_active = 0;
+ if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ }
+ } else {
+ cm->frame_type = INTER_FRAME;
+ if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ target = calc_pframe_target_size_one_pass_cbr(cpi);
+ }
+ }
+ vp9_rc_set_frame_target(cpi, target);
+ cpi->rc.frames_till_gf_update_due = INT_MAX;
+ cpi->rc.baseline_gf_interval = INT_MAX;
+}
+
+void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int target;
+ if ((cm->current_video_frame == 0 ||
+ cm->frame_flags & FRAMEFLAGS_KEY ||
+ rc->frames_to_key == 0 ||
+ (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
+ cm->frame_type = KEY_FRAME;
+ rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+ rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->key_frame_frequency;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ } else {
+ cm->frame_type = INTER_FRAME;
+ target = calc_pframe_target_size_one_pass_cbr(cpi);
+ }
+ vp9_rc_set_frame_target(cpi, target);
+ // Don't use gf_update by default in CBR mode.
+ rc->frames_till_gf_update_due = INT_MAX;
+ rc->baseline_gf_interval = INT_MAX;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index eba4b7a92..5dbc7d138 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -34,19 +34,18 @@ typedef struct {
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
- unsigned int frames_since_golden;
- unsigned int frames_till_gf_update_due; // Count down till next GF
- unsigned int max_gf_interval;
- unsigned int baseline_gf_interval;
- unsigned int frames_to_key;
- unsigned int frames_since_key;
- unsigned int this_key_frame_forced;
- unsigned int next_key_frame_forced;
- unsigned int source_alt_ref_pending;
- unsigned int source_alt_ref_active;
- unsigned int is_src_frame_alt_ref;
-
- int per_frame_bandwidth; // Current section per frame bandwidth target
+ int frames_since_golden;
+ int frames_till_gf_update_due;
+ int max_gf_interval;
+ int baseline_gf_interval;
+ int frames_to_key;
+ int frames_since_key;
+ int this_key_frame_forced;
+ int next_key_frame_forced;
+ int source_alt_ref_pending;
+ int source_alt_ref_active;
+ int is_src_frame_alt_ref;
+
int av_per_frame_bandwidth; // Average frame size target for clip
int min_frame_bandwidth; // Minimum allocation used for any frame
int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
@@ -58,8 +57,8 @@ typedef struct {
double tot_q;
double avg_q;
- int buffer_level;
- int bits_off_target;
+ int64_t buffer_level;
+ int64_t bits_off_target;
int decimation_factor;
int decimation_count;
@@ -74,7 +73,6 @@ typedef struct {
int total_target_vs_actual; // debug stats
int worst_quality;
- int active_worst_quality;
int best_quality;
// int active_best_quality;
} RATE_CONTROL;
@@ -89,50 +87,79 @@ void vp9_setup_inter_frame(struct VP9_COMP *cpi);
double vp9_convert_qindex_to_q(int qindex);
-// Updates rate correction factors
-void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi, int damp_var);
-
// initialize luts for minq
void vp9_rc_init_minq_luts(void);
-// return of 0 means drop frame
-// Changes only rc.this_frame_target and rc.sb64_rate_target
-int vp9_rc_pick_frame_size_target(struct VP9_COMP *cpi);
+// Generally at the high level, the following flow is expected
+// to be enforced for rate control:
+// First call per frame, one of:
+// vp9_rc_get_one_pass_vbr_params()
+// vp9_rc_get_one_pass_cbr_params()
+// vp9_rc_get_svc_params()
+// vp9_rc_get_first_pass_params()
+// vp9_rc_get_second_pass_params()
+// depending on the usage to set the rate control encode parameters desired.
+//
+// Then, call encode_frame_to_data_rate() to perform the
+// actual encode. This function will in turn call encode_frame()
+// one or more times, followed by one of:
+// vp9_rc_postencode_update()
+// vp9_rc_postencode_update_drop_frame()
+//
+// The majority of rate control parameters are only expected
+// to be set in the vp9_rc_get_..._params() functions and
+// updated during the vp9_rc_postencode_update...() functions.
+// The only exceptions are vp9_rc_drop_frame() and
+// vp9_rc_update_rate_correction_factors() functions.
+
+// Functions to set parameters for encoding before the actual
+// encode_frame_to_data_rate() function.
+void vp9_rc_get_one_pass_vbr_params(struct VP9_COMP *cpi);
+void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi);
+void vp9_rc_get_svc_params(struct VP9_COMP *cpi);
+
+// Post encode update of the rate control parameters based
+// on bytes used
+void vp9_rc_postencode_update(struct VP9_COMP *cpi,
+ uint64_t bytes_used);
+// Post encode update of the rate control parameters for dropped frames
+void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
+
+// Updates rate correction factors
+// Changes only the rate correction factors in the rate control structure.
+void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi, int damp_var);
+
+// Decide if we should drop this frame: For 1-pass CBR.
+// Changes only the decimation count in the rate control structure
+int vp9_rc_drop_frame(struct VP9_COMP *cpi);
+// Computes frame size bounds.
void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
int this_frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
// Picks q and q bounds given the target for bits
-int vp9_rc_pick_q_and_adjust_q_bounds(const struct VP9_COMP *cpi,
- int *bottom_index,
- int *top_index);
+int vp9_rc_pick_q_and_bounds(const struct VP9_COMP *cpi,
+ int *bottom_index,
+ int *top_index);
// Estimates q to achieve a target bits per frame
int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame,
int active_best_quality, int active_worst_quality);
-// Post encode update of the rate control parameters based
-// on bytes used
-void vp9_rc_postencode_update(struct VP9_COMP *cpi,
- uint64_t bytes_used);
-// for dropped frames
-void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
-
-// estimates bits per mb for a given qindex and correction factor
+// Estimates bits per mb for a given qindex and correction factor.
int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
double correction_factor);
-// Post encode update of the rate control parameters for 2-pass
-void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
- uint64_t bytes_used);
-
-// Decide if we should drop this frame: For 1-pass CBR.
-int vp9_drop_frame(struct VP9_COMP *cpi);
-
-// Update the buffer level.
-void vp9_update_buffer_level(struct VP9_COMP *cpi, int encoded_frame_size);
+// Clamping utilities for bitrate targets for iframes and pframes.
+int vp9_rc_clamp_iframe_target_size(const struct VP9_COMP *const cpi,
+ int target);
+int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi,
+ int target);
+// Utility to set frame_target into the RATE_CONTROL structure
+// This function is called only from the vp9_rc_get_..._params() functions.
+void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e5230feb4..f7577e174 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -68,7 +68,7 @@ struct rdcost_block_args {
int64_t this_rd;
int64_t best_rd;
int skip;
- const int16_t *scan, *nb;
+ const scan_order *so;
};
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
@@ -274,7 +274,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
MACROBLOCK *x = &cpi->mb;
int qindex, i;
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
// Further tests required to see if optimum is different
// for key frames, golden frames and arf frames.
@@ -285,7 +285,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
- x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO + (x->errorperbit == 0);
+ x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
+ x->errorperbit += (x->errorperbit == 0);
vp9_set_speed_features(cpi);
@@ -294,21 +295,22 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_block_thresholds(cpi);
- fill_token_costs(x->token_costs, cm->fc.coef_probs);
+ if (!cpi->sf.use_nonrd_pick_mode) {
+ fill_token_costs(x->token_costs, cm->fc.coef_probs);
- if (!cpi->sf.super_fast_rtc) {
for (i = 0; i < PARTITION_CONTEXTS; i++)
vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
vp9_partition_tree);
+ }
+ if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) {
vp9_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
- &cm->fc.nmvc,
- cm->allow_high_precision_mv, 1, 1);
+ &cm->fc.nmvc, cm->allow_high_precision_mv);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp9_cost_tokens((int *)x->inter_mode_cost[i],
@@ -414,9 +416,10 @@ static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
*dist = 0;
} else {
int d_q10, r_q10;
- uint64_t xsq_q10_64 =
+ const uint64_t xsq_q10_64 =
((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
- int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? MAX_XSQ_Q10 : xsq_q10_64;
+ const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
+ MAX_XSQ_Q10 : (int)xsq_q10_64;
model_rd_norm(xsq_q10, &r_q10, &d_q10);
*rate = (n * r_q10 + 2) >> 2;
*dist = (var * (int64_t)d_q10 + 512) >> 10;
@@ -429,7 +432,9 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
- int i, rate_sum = 0, dist_sum = 0;
+ int i;
+ int64_t rate_sum = 0;
+ int64_t dist_sum = 0;
int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
unsigned int sse;
@@ -443,20 +448,33 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
if (i == 0)
x->pred_sse[ref] = sse;
- if (cpi->sf.super_fast_rtc) {
- dist_sum += (int)sse;
+
+ // Fast approximate the modelling function.
+ if (cpi->speed > 4) {
+ int64_t rate;
+ int64_t dist;
+ int64_t square_error = sse;
+ int quantizer = (pd->dequant[1] >> 3);
+
+ if (quantizer < 120)
+ rate = (square_error * (280 - quantizer)) >> 8;
+ else
+ rate = 0;
+ dist = (square_error * quantizer) >> 8;
+ rate_sum += rate;
+ dist_sum += dist;
} else {
int rate;
int64_t dist;
model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
rate_sum += rate;
- dist_sum += (int)dist;
+ dist_sum += dist;
}
}
- *out_rate_sum = rate_sum;
- *out_dist_sum = (int64_t)dist_sum << 4;
+ *out_rate_sum = (int)rate_sum;
+ *out_dist_sum = dist_sum << 4;
}
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
@@ -546,18 +564,16 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
const PLANE_TYPE type = pd->plane_type;
const int16_t *band_count = &band_counts[tx_size][1];
const int eob = p->eobs[block];
- const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
- const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
+ const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- x->token_costs[tx_size][type][ref];
- const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
+ x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t *p_tok = x->token_cache;
- int pt = combine_entropy_contexts(above_ec, left_ec);
+ int pt = combine_entropy_contexts(*A, *L);
int c, cost;
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
- : get_uv_tx_size(mbmi) == tx_size);
+ : get_uv_tx_size(mbmi) == tx_size);
if (eob == 0) {
// single eob token
@@ -567,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
int band_left = *band_count++;
// dc token
- int v = qcoeff_ptr[0];
+ int v = qcoeff[0];
int prev_t = vp9_dct_value_tokens_ptr[v].token;
cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
p_tok[0] = vp9_pt_energy_class[prev_t];
@@ -578,7 +594,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
const int rc = scan[c];
int t;
- v = qcoeff_ptr[rc];
+ v = qcoeff[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(nb, p_tok, c);
cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
@@ -634,7 +650,7 @@ static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
args->t_left + y_idx, tx_size,
- args->scan, args->nb);
+ args->so->scan, args->so->neighbors);
}
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -643,17 +659,15 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- struct encode_b_args encode_args = {x, NULL, &mbmi->skip_coeff};
-
int64_t rd1, rd2, rd;
if (args->skip)
return;
- if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
- vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
+ if (!is_inter_block(mbmi))
+ vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
else
- vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
+ vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
dist_block(plane, block, tx_size, args);
rate_block(plane, block, plane_bsize, tx_size, args);
@@ -677,10 +691,16 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
}
}
-void vp9_get_entropy_contexts(TX_SIZE tx_size,
- ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
- const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
- int num_4x4_w, int num_4x4_h) {
+void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[16],
+ ENTROPY_CONTEXT t_left[16]) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const ENTROPY_CONTEXT *const above = pd->above_context;
+ const ENTROPY_CONTEXT *const left = pd->left_context;
+
int i;
switch (tx_size) {
case TX_4X4:
@@ -710,49 +730,35 @@ void vp9_get_entropy_contexts(TX_SIZE tx_size,
}
}
-static void init_rdcost_stack(MACROBLOCK *x, const int64_t ref_rdcost,
- struct rdcost_block_args *arg) {
- vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
- arg->x = x;
- arg->best_rd = ref_rdcost;
-}
-
static void txfm_rd_in_plane(MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
- struct rdcost_block_args rd_stack;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
- const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
- const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
- const scan_order *so;
+ struct rdcost_block_args args = { 0 };
+ args.x = x;
+ args.best_rd = ref_best_rd;
- init_rdcost_stack(x, ref_best_rd, &rd_stack);
if (plane == 0)
xd->mi_8x8[0]->mbmi.tx_size = tx_size;
- vp9_get_entropy_contexts(tx_size, rd_stack.t_above, rd_stack.t_left,
- pd->above_context, pd->left_context,
- num_4x4_w, num_4x4_h);
+ vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
- so = get_scan(xd, tx_size, pd->plane_type, 0);
- rd_stack.scan = so->scan;
- rd_stack.nb = so->neighbors;
+ args.so = get_scan(xd, tx_size, pd->plane_type, 0);
vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
- block_rd_txfm, &rd_stack);
- if (rd_stack.skip) {
+ block_rd_txfm, &args);
+ if (args.skip) {
*rate = INT_MAX;
*distortion = INT64_MAX;
*sse = INT64_MAX;
*skippable = 0;
} else {
- *distortion = rd_stack.this_dist;
- *rate = rd_stack.this_rate;
- *sse = rd_stack.this_sse;
+ *distortion = args.this_dist;
+ *rate = args.this_rate;
+ *sse = args.this_sse;
*skippable = vp9_is_skippable_in_plane(x, bsize, plane);
}
}
@@ -787,7 +793,10 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
- int64_t rd[TX_SIZES][2];
+ int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
+ {INT64_MAX, INT64_MAX},
+ {INT64_MAX, INT64_MAX},
+ {INT64_MAX, INT64_MAX}};
int n, m;
int s0, s1;
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@@ -851,6 +860,11 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
}
}
+static int64_t scaled_rd_cost(int rdmult, int rddiv,
+ int rate, int64_t dist, double scale) {
+ return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
+}
+
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int64_t *d, int64_t *distortion,
@@ -862,7 +876,10 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
- int64_t rd[TX_SIZES][2];
+ int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
+ {INT64_MAX, INT64_MAX},
+ {INT64_MAX, INT64_MAX},
+ {INT64_MAX, INT64_MAX}};
int n, m;
int s0, s1;
double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
@@ -885,10 +902,13 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
r[n][1] += vp9_cost_one(tx_probs[m]);
}
if (s[n]) {
- rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
+ rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
+ scale);
} else {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
+ rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
+ scale);
+ rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
+ scale);
}
if (rd[n][1] < best_rd) {
best_rd = rd[n][1];
@@ -915,27 +935,23 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
}
}
-static void super_block_yrd(VP9_COMP *cpi,
- MACROBLOCK *x, int *rate, int64_t *distortion,
- int *skip, int64_t *psse, BLOCK_SIZE bs,
- int64_t txfm_cache[TX_MODES],
- int64_t ref_best_rd) {
+static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, BLOCK_SIZE bs,
+ int64_t txfm_cache[TX_MODES],
+ int64_t ref_best_rd) {
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- const int b_inter_mode = is_inter_block(mbmi);
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
TX_SIZE tx_size;
-
assert(bs == mbmi->sb_type);
- if (b_inter_mode)
- vp9_subtract_sby(x, bs);
- if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
- (cpi->sf.tx_size_search_method != USE_FULL_RD &&
- !b_inter_mode)) {
+ vp9_subtract_plane(x, bs, 0);
+
+ if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
@@ -944,8 +960,7 @@ static void super_block_yrd(VP9_COMP *cpi,
return;
}
- if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
- b_inter_mode) {
+ if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
&r[tx_size][0], &d[tx_size], &s[tx_size]);
@@ -963,6 +978,36 @@ static void super_block_yrd(VP9_COMP *cpi,
*psse = sse[mbmi->tx_size];
}
+static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, BLOCK_SIZE bs,
+ int64_t txfm_cache[TX_MODES],
+ int64_t ref_best_rd) {
+ int64_t sse[TX_SIZES];
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+
+ assert(bs == mbmi->sb_type);
+ if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
+ vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
+ choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
+ ref_best_rd, bs);
+ } else {
+ int r[TX_SIZES][2], s[TX_SIZES];
+ int64_t d[TX_SIZES];
+ TX_SIZE tx_size;
+ for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
+ txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
+ &s[tx_size], &sse[tx_size],
+ ref_best_rd, 0, bs, tx_size);
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
+ skip, txfm_cache, bs);
+ }
+ if (psse)
+ *psse = sse[mbmi->tx_size];
+}
+
+
static int conditional_skipintra(MB_PREDICTION_MODE mode,
MB_PREDICTION_MODE best_intra_mode) {
if (mode == D117_PRED &&
@@ -1064,7 +1109,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
so = &vp9_scan_orders[TX_4X4][tx_type];
if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
+ vp9_fht4x4(src_diff, coeff, 8, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, 8);
@@ -1223,8 +1268,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
mic->mbmi.mode = mode;
- super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
- bsize, local_tx_cache, best_rd);
+ intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, local_tx_cache, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1259,7 +1304,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
return best_rd;
}
-static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
+static void super_block_uvrd(MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
@@ -1273,8 +1318,11 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
if (ref_best_rd < 0)
goto term;
- if (is_inter_block(mbmi))
- vp9_subtract_sbuv(x, bsize);
+ if (is_inter_block(mbmi)) {
+ int plane;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane)
+ vp9_subtract_plane(x, bsize, plane);
+ }
*rate = 0;
*distortion = 0;
@@ -1306,6 +1354,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
+ MACROBLOCKD *xd = &x->e_mbd;
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE mode_selected = DC_PRED;
int64_t best_rd = INT64_MAX, this_rd;
@@ -1316,9 +1365,9 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
- x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
+ xd->mi_8x8[0]->mbmi.uv_mode = mode;
- super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ super_block_uvrd(x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1336,7 +1385,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->select_txfm_size) {
int i;
struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = x->e_mbd.plane;
+ struct macroblockd_plane *const pd = xd->plane;
for (i = 1; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
@@ -1357,25 +1406,21 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
+ xd->mi_8x8[0]->mbmi.uv_mode = mode_selected;
return best_rd;
}
-static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
+static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
- int64_t this_rd;
- int64_t this_sse;
+ int64_t unused;
x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
- super_block_uvrd(cpi, x, rate_tokenonly, distortion,
- skippable, &this_sse, bsize, INT64_MAX);
- *rate = *rate_tokenonly +
- x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
- this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
-
- return this_rd;
+ super_block_uvrd(x, rate_tokenonly, distortion,
+ skippable, &unused, bsize, INT64_MAX);
+ *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
@@ -1388,8 +1433,8 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
if (cpi->sf.use_uv_intra_rd_estimate) {
- rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
- bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+ rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv,
+ skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
@@ -1403,8 +1448,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
int mode_context) {
MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+ const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
@@ -1429,7 +1473,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int *rate_mv);
static int labels2mode(MACROBLOCK *x, int i,
- MB_PREDICTION_MODE this_mode,
+ MB_PREDICTION_MODE mode,
int_mv *this_mv, int_mv *this_second_mv,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
int_mv seg_mvs[MAX_REF_FRAMES],
@@ -1439,23 +1483,18 @@ static int labels2mode(MACROBLOCK *x, int i,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi_8x8[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
- int cost = 0, thismvcost = 0;
+ int thismvcost = 0;
int idx, idy;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
const int has_second_rf = has_second_ref(mbmi);
- /* We have to be careful retrieving previously-encoded motion vectors.
- Ones from this macroblock have to be pulled from the BLOCKD array
- as they have not yet made it to the bmi array in our MB_MODE_INFO. */
- MB_PREDICTION_MODE m;
-
// the only time we should do costing for new motion vector or mode
// is when we are on a new label (jbb May 08, 2007)
- switch (m = this_mode) {
+ switch (mode) {
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+ thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
if (has_second_rf) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
@@ -1467,14 +1506,12 @@ static int labels2mode(MACROBLOCK *x, int i,
case NEARESTMV:
this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
- this_second_mv->as_int =
- frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
+ this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
break;
case NEARMV:
this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
- this_second_mv->as_int =
- frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
+ this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
break;
case ZEROMV:
this_mv->as_int = 0;
@@ -1485,22 +1522,19 @@ static int labels2mode(MACROBLOCK *x, int i,
break;
}
- cost = cost_mv_ref(cpi, this_mode,
- mbmi->mode_context[mbmi->ref_frame[0]]);
-
mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
if (has_second_rf)
mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
- mic->bmi[i].as_mode = m;
+ mic->bmi[i].as_mode = mode;
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
&mic->bmi[i], sizeof(mic->bmi[i]));
- cost += thismvcost;
- return cost;
+ return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
+ thismvcost;
}
static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
@@ -1604,13 +1638,11 @@ typedef struct {
int mvthresh;
} BEST_SEG_INFO;
-static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
- int r = 0;
- r |= (mv->as_mv.row >> 3) < x->mv_row_min;
- r |= (mv->as_mv.row >> 3) > x->mv_row_max;
- r |= (mv->as_mv.col >> 3) < x->mv_col_min;
- r |= (mv->as_mv.col >> 3) > x->mv_col_max;
- return r;
+static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
+ return (mv->row >> 3) < x->mv_row_min ||
+ (mv->row >> 3) > x->mv_row_max ||
+ (mv->col >> 3) < x->mv_col_min ||
+ (mv->col >> 3) > x->mv_col_max;
}
static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
@@ -1645,14 +1677,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi_buf, int filter_idx,
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
- int i, br = 0, idx, idy;
+ int k, br = 0, idx, idy;
int64_t bd = 0, block_sse = 0;
MB_PREDICTION_MODE this_mode;
+ MACROBLOCKD *xd = &x->e_mbd;
VP9_COMMON *cm = &cpi->common;
- MODE_INFO *mi = x->e_mbd.mi_8x8[0];
+ MODE_INFO *mi = xd->mi_8x8[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
const int label_count = 4;
int64_t this_segment_rd = 0;
int label_mv_thresh;
@@ -1660,7 +1693,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
const BLOCK_SIZE bsize = mbmi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
- vp9_variance_fn_ptr_t *v_fn_ptr;
+ vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
ENTROPY_CONTEXT t_above[2], t_left[2];
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
int mode_idx;
@@ -1670,8 +1703,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
- v_fn_ptr = &cpi->fn_ptr[bsize];
-
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
// segments. setting this to 1 would make mv thresh
@@ -1687,20 +1718,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
MB_PREDICTION_MODE mode_selected = ZEROMV;
int64_t best_rd = INT64_MAX;
- i = idy * 2 + idx;
-
- frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
- vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
- i, 0, mi_row, mi_col,
- &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
- &frame_mv[NEARMV][mbmi->ref_frame[0]]);
- if (has_second_rf) {
- frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
- vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
- i, 1, mi_row, mi_col,
- &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
- &frame_mv[NEARMV][mbmi->ref_frame[1]]);
+ const int i = idy * 2 + idx;
+ int ref;
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+ const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ frame_mv[ZEROMV][frame].as_int = 0;
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
+ &frame_mv[NEARESTMV][frame],
+ &frame_mv[NEARMV][frame]);
}
+
// search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
const struct buf_2d orig_src = x->plane[0].src;
@@ -1829,28 +1857,28 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
&bsi->ref_mv->as_mv,
- new_mv);
+ &new_mv->as_mv);
}
// Should we do a full search (best quality only)
if (cpi->oxcf.mode == MODE_BESTQUALITY ||
cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
+ int_mv *const best_mv = &mi->bmi[i].as_mv[0];
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
-
thissme = cpi->full_search_sad(x, &mvp_full,
sadpb, 16, v_fn_ptr,
x->nmvjointcost, x->mvcost,
- &bsi->ref_mv->as_mv, i);
-
+ &bsi->ref_mv->as_mv,
+ &best_mv->as_mv);
if (thissme < bestsme) {
bestsme = thissme;
- new_mv->as_int = mi->bmi[i].as_mv[0].as_int;
+ new_mv->as_int = best_mv->as_int;
} else {
- /* The full search result is actually worse so re-instate the
- * previous best vector */
- mi->bmi[i].as_mv[0].as_int = new_mv->as_int;
+ // The full search result is actually worse so re-instate the
+ // previous best vector
+ best_mv->as_int = new_mv->as_int;
}
}
@@ -1928,10 +1956,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
}
// Trap vectors that reach beyond the UMV borders
- if (mv_check_bounds(x, &mode_mv[this_mode]))
- continue;
- if (has_second_rf &&
- mv_check_bounds(x, &second_mode_mv[this_mode]))
+ if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
+ (has_second_rf &&
+ mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
continue;
if (filter_idx > 0) {
@@ -2042,8 +2069,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bsi->sse = block_sse;
// update the coding decisions
- for (i = 0; i < 4; ++i)
- bsi->modes[i] = mi->bmi[i].as_mode;
+ for (k = 0; k < 4; ++k)
+ bsi->modes[k] = mi->bmi[k].as_mode;
}
static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2356,7 +2383,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int sadpb = x->sadperbit16;
MV mvp_full;
int ref = mbmi->ref_frame[0];
- int_mv ref_mv = mbmi->ref_mvs[ref][0];
+ MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
@@ -2366,10 +2393,10 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
- int_mv pred_mv[3];
- pred_mv[0] = mbmi->ref_mvs[ref][0];
- pred_mv[1] = mbmi->ref_mvs[ref][1];
- pred_mv[2] = x->pred_mv[ref];
+ MV pred_mv[3];
+ pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
+ pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
+ pred_mv[2] = x->pred_mv[ref].as_mv;
if (scaled_ref_frame) {
int i;
@@ -2382,26 +2409,18 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
}
- vp9_set_mv_search_range(x, &ref_mv.as_mv);
+ vp9_set_mv_search_range(x, &ref_mv);
- // Adjust search parameters based on small partitions' result.
- if (x->fast_ms) {
- // adjust search range
- step_param = 6;
- if (x->fast_ms > 1)
- step_param = 8;
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+ if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and that based on the best ref mvs of the current
+ // block for the given reference.
+ step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
+ cpi->mv_step_param) >> 1;
} else {
- // Work out the size of the first step in the mv step search.
- // 0 here is maximum length first step. 1 is MAX >> 1 etc.
- if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
- // Take wtd average of the step_params based on the last frame's
- // max mv magnitude and that based on the best ref mvs of the current
- // block for the given reference.
- step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
- cpi->mv_step_param) >> 1;
- } else {
- step_param = cpi->mv_step_param;
- }
+ step_param = cpi->mv_step_param;
}
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
@@ -2435,7 +2454,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- mvp_full = pred_mv[x->mv_best_ref_index[ref]].as_mv;
+ mvp_full = pred_mv[x->mv_best_ref_index[ref]];
mvp_full.col >>= 3;
mvp_full.row >>= 3;
@@ -2443,23 +2462,27 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// Further step/diamond searches as necessary
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
- if (cpi->sf.search_method == HEX) {
+ if (cpi->sf.search_method == FAST_HEX) {
+ bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
+ &cpi->fn_ptr[bsize], 1,
+ &ref_mv, &tmp_mv->as_mv);
+ } else if (cpi->sf.search_method == HEX) {
bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == SQUARE) {
bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 1,
&cpi->fn_ptr[bsize],
- &ref_mv.as_mv, tmp_mv);
+ &ref_mv, &tmp_mv->as_mv);
}
x->mv_col_min = tmp_col_min;
@@ -2469,7 +2492,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
- cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
+ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
@@ -2478,7 +2501,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref]);
}
- *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
+ *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
@@ -2705,6 +2728,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int_mv tmp_mv;
single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
&tmp_mv, &rate_mv);
+ if (tmp_mv.as_int == INVALID_MV)
+ return INT64_MAX;
*rate2 += rate_mv;
frame_mv[refs[0]].as_int =
xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
@@ -2717,7 +2742,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[refs[0]].as_int == 0 &&
!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
(num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
- int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
+ int rfc = mbmi->mode_context[refs[0]];
int c1 = cost_mv_ref(cpi, NEARMV, rfc);
int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
@@ -2732,17 +2757,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
assert(this_mode == ZEROMV);
if (num_refs == 1) {
if ((c3 >= c2 &&
- mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
+ mode_mv[NEARESTMV][refs[0]].as_int == 0) ||
(c3 >= c1 &&
- mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
+ mode_mv[NEARMV][refs[0]].as_int == 0))
return INT64_MAX;
} else {
if ((c3 >= c2 &&
- mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
- mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
+ mode_mv[NEARESTMV][refs[0]].as_int == 0 &&
+ mode_mv[NEARESTMV][refs[1]].as_int == 0) ||
(c3 >= c1 &&
- mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
- mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
+ mode_mv[NEARMV][refs[0]].as_int == 0 &&
+ mode_mv[NEARMV][refs[1]].as_int == 0))
return INT64_MAX;
}
}
@@ -2754,7 +2779,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (this_mode != NEWMV)
clamp_mv2(&cur_mv[i].as_mv, xd);
- if (mv_check_bounds(x, &cur_mv[i]))
+ if (mv_check_bounds(x, &cur_mv[i].as_mv))
return INT64_MAX;
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
@@ -2773,8 +2798,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
* are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
* words if you present them in that order, the second one is always known
* if the first is known */
- *rate2 += cost_mv_ref(cpi, this_mode,
- mbmi->mode_context[mbmi->ref_frame[0]]);
+ *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
if (!(*mode_excluded))
*mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
@@ -2910,33 +2934,26 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cm->interp_filter == SWITCHABLE)
*rate2 += get_switchable_rate(x);
- if (!is_comp_pred && cpi->enable_encode_breakout) {
+ if (!is_comp_pred) {
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
x->skip = 1;
- else if (x->encode_breakout) {
+ else if (cpi->allow_encode_breakout && x->encode_breakout) {
const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
unsigned int var, sse;
// Skipping threshold for ac.
unsigned int thresh_ac;
- // The encode_breakout input
- unsigned int encode_breakout = x->encode_breakout << 4;
- unsigned int max_thresh = 36000;
-
+ // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
// Use extreme low threshold for static frames to limit skipping.
- if (cpi->enable_encode_breakout == 2)
- max_thresh = 128;
+ const unsigned int max_thresh = (cpi->allow_encode_breakout ==
+ ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
+ // The encode_breakout input
+ const unsigned int min_thresh =
+ MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
// Calculate threshold according to dequant value.
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
-
- // Use encode_breakout input if it is bigger than internal threshold.
- if (thresh_ac < encode_breakout)
- thresh_ac = encode_breakout;
-
- // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
- if (thresh_ac > max_thresh)
- thresh_ac = max_thresh;
+ thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf,
@@ -2999,8 +3016,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t rdcosty = INT64_MAX;
// Y cost and distortion
- super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
- bsize, txfm_cache, ref_best_rd);
+ inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
+ bsize, txfm_cache, ref_best_rd);
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
@@ -3015,7 +3032,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
- super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
+ super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
bsize, ref_best_rd - rdcosty);
if (*rate_uv == INT_MAX) {
*rate2 = INT_MAX;
@@ -3123,10 +3140,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
- VP9_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- const struct segmentation *seg = &cm->seg;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+ const struct segmentation *const seg = &cm->seg;
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
MB_PREDICTION_MODE this_mode;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
@@ -3162,12 +3179,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
int best_skip2 = 0;
+ int mode_skip_mask = 0;
+ const int mode_skip_start = cpi->sf.mode_skip_start + 1;
+ const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
+ const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+ const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
- // Everywhere the flag is set the error is much higher than its neighbors.
- ctx->modes_with_high_error = 0;
-
estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
@@ -3195,16 +3214,72 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- cpi->ref_frame_mask = 0;
- for (ref_frame = LAST_FRAME;
- ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
- int i;
- for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
- if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
- cpi->ref_frame_mask |= (1 << ref_frame);
- break;
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ // All modes from vp9_mode_order that use this frame as any ref
+ static const int ref_frame_mask_all[] = {
+ 0x0, 0x123291, 0x25c444, 0x39b722
+ };
+ // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
+ // this frame as their primary ref
+ static const int ref_frame_mask_fixedmv[] = {
+ 0x0, 0x121281, 0x24c404, 0x080102
+ };
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
+ // Skip modes for missing references
+ mode_skip_mask |= ref_frame_mask_all[ref_frame];
+ } else if (cpi->sf.reference_masking) {
+ for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+ // Skip fixed mv modes for poor references
+ if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
+ mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
+ break;
+ }
}
}
+ // If the segment reference frame feature is enabled....
+ // then do nothing if the current ref frame is not allowed..
+ if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ mode_skip_mask |= ref_frame_mask_all[ref_frame];
+ }
+ }
+
+ // If the segment skip feature is enabled....
+ // then do nothing if the current mode is not allowed..
+ if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
+ const int inter_non_zero_mode_mask = 0x1F7F7;
+ mode_skip_mask |= inter_non_zero_mode_mask;
+ }
+
+ // Disable this drop out case if the ref frame
+ // segment level feature is enabled for this segment. This is to
+ // prevent the possibility that we end up unable to pick any mode.
+ if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative. We allow near/nearest as well
+ // because they may result in zero-zero MVs but be cheaper.
+ if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
+ const int altref_zero_mask =
+ ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
+ mode_skip_mask |= altref_zero_mask;
+ if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask |= (1 << THR_NEARA);
+ if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask |= (1 << THR_NEARESTA);
+ }
+ }
+
+ // TODO(JBB): This is to make up for the fact that we don't have sad
+ // functions that work when the block size reads outside the umv. We
+ // should fix this either by making the motion search just work on
+ // a representative block in the boundary ( first ) and then implement a
+ // function that does sads when inside the border..
+ if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
+ const int new_modes_mask =
+ (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
+ (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
+ mode_skip_mask |= new_modes_mask;
}
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
@@ -3218,109 +3293,95 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t tx_cache[TX_MODES];
int i;
int this_skip2 = 0;
- int64_t total_sse = INT_MAX;
+ int64_t total_sse = INT64_MAX;
int early_term = 0;
- for (i = 0; i < TX_MODES; ++i)
- tx_cache[i] = INT64_MAX;
-
- x->skip = 0;
- this_mode = vp9_mode_order[mode_index].mode;
- ref_frame = vp9_mode_order[mode_index].ref_frame[0];
- second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
-
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
- if (mode_index > cpi->sf.mode_skip_start) {
- if (mode_index == (cpi->sf.mode_skip_start + 1)) {
- switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
- case INTRA_FRAME:
- cpi->mode_skip_mask = 0;
- break;
- case LAST_FRAME:
- cpi->mode_skip_mask = LAST_FRAME_MODE_MASK;
- break;
- case GOLDEN_FRAME:
- cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
- break;
- case ALTREF_FRAME:
- cpi->mode_skip_mask = ALT_REF_MODE_MASK;
- break;
- case NONE:
- case MAX_REF_FRAMES:
- assert(0 && "Invalid Reference frame");
- }
+ if (mode_index == mode_skip_start) {
+ switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
+ case INTRA_FRAME:
+ break;
+ case LAST_FRAME:
+ mode_skip_mask |= LAST_FRAME_MODE_MASK;
+ break;
+ case GOLDEN_FRAME:
+ mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
+ break;
+ case ALTREF_FRAME:
+ mode_skip_mask |= ALT_REF_MODE_MASK;
+ break;
+ case NONE:
+ case MAX_REF_FRAMES:
+ assert(0 && "Invalid Reference frame");
}
- if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
- continue;
}
-
- // Skip if the current reference frame has been masked off
- if (cpi->ref_frame_mask & (1 << ref_frame) && this_mode != NEWMV)
+ if (mode_skip_mask & (1 << mode_index))
continue;
// Test best rd so far against threshold for trying this mode.
- if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
- cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
- cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
- continue;
-
- // Do not allow compound prediction if the segment level reference
- // frame feature is in use as in this case there can only be one reference.
- if ((second_ref_frame > INTRA_FRAME) &&
- vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
- continue;
-
- // Skip some checking based on small partitions' result.
- if (x->fast_ms > 1 && !ref_frame)
- continue;
- if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
- continue;
-
- mbmi->ref_frame[0] = ref_frame;
- mbmi->ref_frame[1] = second_ref_frame;
+ if (best_rd < ((int64_t)rd_threshes[mode_index] *
+ rd_thresh_freq_fact[mode_index] >> 5) ||
+ rd_threshes[mode_index] == INT_MAX)
+ continue;
- if (!(ref_frame == INTRA_FRAME
- || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
- continue;
- }
- if (!(second_ref_frame == NONE
- || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
- continue;
- }
+ this_mode = vp9_mode_order[mode_index].mode;
+ ref_frame = vp9_mode_order[mode_index].ref_frame[0];
+ second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
- if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
- if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
- continue;
- if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
- if (ref_frame != best_inter_ref_frame &&
- second_ref_frame != best_inter_ref_frame)
+ if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+ vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
+ continue;
+ if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
+ ref_frame != best_inter_ref_frame &&
+ second_ref_frame != best_inter_ref_frame)
+ continue;
+ mode_excluded = mode_excluded ?
+ mode_excluded : cm->reference_mode == SINGLE_REFERENCE;
+ } else {
+ if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME)
+ mode_excluded = mode_excluded ?
+ mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
+ }
+
+ if (ref_frame == INTRA_FRAME) {
+ // Disable intra modes other than DC_PRED for blocks with low variance
+ // Threshold for intra skipping based on source variance
+ // TODO(debargha): Specialize the threshold for super block sizes
+ static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ };
+ if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
+ this_mode != DC_PRED &&
+ x->source_variance < skip_intra_var_thresh[bsize])
+ continue;
+ // Only search the oblique modes if the best so far is
+ // one of the neighboring directional modes
+ if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
+ (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
+ if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
continue;
+ }
+ if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+ if (conditional_skipintra(this_mode, best_intra_mode))
+ continue;
+ }
}
- set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+ mbmi->mode = this_mode;
mbmi->uv_mode = DC_PRED;
-
+ mbmi->ref_frame[0] = ref_frame;
+ mbmi->ref_frame[1] = second_ref_frame;
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
: cm->interp_filter;
+ x->skip = 0;
+ set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
- if (comp_pred) {
- if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
- continue;
-
- mode_excluded = mode_excluded ? mode_excluded
- : cm->reference_mode == SINGLE_REFERENCE;
- } else {
- if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME)
- mode_excluded = mode_excluded ?
- mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
- }
-
// Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
@@ -3328,46 +3389,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
- // If the segment reference frame feature is enabled....
- // then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
- (int)ref_frame) {
- continue;
- // If the segment skip feature is enabled....
- // then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
- (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
- continue;
- // Disable this drop out case if the ref frame
- // segment level feature is enabled for this segment. This is to
- // prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(seg, segment_id,
- SEG_LVL_REF_FRAME)) {
- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
- // unless ARNR filtering is enabled in which case we want
- // an unfiltered alternative. We allow near/nearest as well
- // because they may result in zero-zero MVs but be cheaper.
- if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
- if ((this_mode != ZEROMV &&
- !(this_mode == NEARMV &&
- frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
- !(this_mode == NEARESTMV &&
- frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
- ref_frame != ALTREF_FRAME) {
- continue;
- }
- }
- }
- // TODO(JBB): This is to make up for the fact that we don't have sad
- // functions that work when the block size reads outside the umv. We
- // should fix this either by making the motion search just work on
- // a representative block in the boundary ( first ) and then implement a
- // function that does sads when inside the border..
- if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
- this_mode == NEWMV) {
- continue;
- }
+ for (i = 0; i < TX_MODES; ++i)
+ tx_cache[i] = INT64_MAX;
#ifdef MODE_TEST_HIT_STATS
// TEST/DEBUG CODE
@@ -3375,34 +3398,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->mode_test_hits[bsize]++;
#endif
-
if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
- // Disable intra modes other than DC_PRED for blocks with low variance
- // Threshold for intra skipping based on source variance
- // TODO(debargha): Specialize the threshold for super block sizes
- static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- };
- if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
- this_mode != DC_PRED &&
- x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
- continue;
- // Only search the oblique modes if the best so far is
- // one of the neighboring directional modes
- if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
- (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
- if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
- continue;
- }
- mbmi->mode = this_mode;
- if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
- if (conditional_skipintra(mbmi->mode, best_intra_mode))
- continue;
- }
-
- super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
- bsize, tx_cache, best_rd);
+ intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
+ bsize, tx_cache, best_rd);
if (rate_y == INT_MAX)
continue;
@@ -3424,8 +3423,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
} else {
- mbmi->mode = this_mode;
- compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
this_rd = handle_inter_mode(cpi, x, tile, bsize,
tx_cache,
&rate2, &distortion2, &skippable,
@@ -3437,14 +3434,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
single_newmv, &total_sse, best_rd);
if (this_rd == INT64_MAX)
continue;
- }
- if (cm->reference_mode == REFERENCE_MODE_SELECT)
- rate2 += compmode_cost;
+ compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ rate2 += compmode_cost;
+ }
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
- if (second_ref_frame > INTRA_FRAME) {
+ if (comp_pred) {
rate2 += ref_costs_comp[ref_frame];
} else {
rate2 += ref_costs_single[ref_frame];
@@ -3552,7 +3551,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
- if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
+ if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(mode_index > MIN_EARLY_TERM_INDEX)) {
const int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index
@@ -3662,17 +3661,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- // Flag all modes that have a distortion thats > 2x the best we found at
- // this level.
- for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
- if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
- continue;
-
- if (mode_distortions[mode_index] > 2 * *returndistortion) {
- ctx->modes_with_high_error |= (1 << mode_index);
- }
- }
-
assert((cm->interp_filter == SWITCHABLE) ||
(cm->interp_filter == best_mbmode.interp_filter) ||
!is_inter_block(&best_mbmode));
@@ -3787,6 +3775,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
+ int ref_frame_mask = 0;
+ int mode_skip_mask = 0;
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
@@ -3822,13 +3812,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- cpi->ref_frame_mask = 0;
for (ref_frame = LAST_FRAME;
ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
int i;
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
- cpi->ref_frame_mask |= (1 << ref_frame);
+ ref_frame_mask |= (1 << ref_frame);
break;
}
}
@@ -3861,23 +3850,23 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (mode_index == 3) {
switch (vp9_ref_order[best_mode_index].ref_frame[0]) {
case INTRA_FRAME:
- cpi->mode_skip_mask = 0;
+ mode_skip_mask = 0;
break;
case LAST_FRAME:
- cpi->mode_skip_mask = 0x0010;
+ mode_skip_mask = 0x0010;
break;
case GOLDEN_FRAME:
- cpi->mode_skip_mask = 0x0008;
+ mode_skip_mask = 0x0008;
break;
case ALTREF_FRAME:
- cpi->mode_skip_mask = 0x0000;
+ mode_skip_mask = 0x0000;
break;
case NONE:
case MAX_REF_FRAMES:
assert(0 && "Invalid Reference frame");
}
}
- if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
+ if (mode_skip_mask & (1 << mode_index))
continue;
}
@@ -4137,11 +4126,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (tmp_rd == INT64_MAX)
continue;
} else {
- if (cm->interp_filter == SWITCHABLE) {
- int rs = get_switchable_rate(x);
- tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
- }
- tmp_rd = tmp_best_rdu;
total_sse = tmp_best_sse;
rate = tmp_best_rate;
rate_y = tmp_best_ratey;
@@ -4173,7 +4157,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_8X8);
- super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable,
&uv_sse, BLOCK_8X8, tmp_best_rdu);
if (rate_uv == INT_MAX)
continue;
@@ -4392,7 +4376,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
*returnrate = INT_MAX;
- *returndistortion = INT_MAX;
+ *returndistortion = INT64_MAX;
return best_rd;
}
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 96cea4216..6b85d67f8 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -80,10 +80,10 @@ void vp9_init_me_luts();
void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
const MV *mv);
-void vp9_get_entropy_contexts(TX_SIZE tx_size,
- ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
- const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
- int num_4x4_w, int num_4x4_h);
+void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[16],
+ ENTROPY_CONTEXT t_left[16]);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c
index 0766b5107..4e6efaeb9 100644
--- a/vp9/encoder/vp9_resize.c
+++ b/vp9/encoder/vp9_resize.c
@@ -14,6 +14,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_resize.h"
@@ -24,9 +25,6 @@
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
#define INTERP_PRECISION_BITS 32
-#define ROUND_POWER_OF_TWO(value, n) \
- (((value) + (1 << ((n) - 1))) >> (n))
-
typedef int16_t interp_kernel[INTERP_TAPS];
// Filters for interpolation (0.5-band) - note this also filters integer pels.
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad.c
index 58c5df47e..58c5df47e 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad.c
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index e822e4c64..502e4b678 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -20,7 +20,6 @@
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_psnr.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
@@ -29,7 +28,6 @@
#include "vpx_scale/vpx_scale.h"
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
-#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
@@ -134,17 +132,16 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int sadpb = x->sadperbit16;
int bestsme = INT_MAX;
- int_mv best_ref_mv1;
- int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
- int_mv *ref_mv;
+ MV best_ref_mv1 = {0, 0};
+ MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+ MV *ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0].as_mv;
// Save input state
struct buf_2d src = x->plane[0].src;
struct buf_2d pre = xd->plane[0].pre[0];
- best_ref_mv1.as_int = 0;
- best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3;
- best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3;
+ best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+ best_ref_mv1_full.row = best_ref_mv1.row >> 3;
// Setup frame pointers
x->plane[0].src.buf = arf_frame_buf;
@@ -161,21 +158,17 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
/*cpi->sf.search_method == HEX*/
// Ignore mv costing by sending NULL pointer instead of cost arrays
- ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0];
- bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv,
- step_param, sadpb, 1,
- &cpi->fn_ptr[BLOCK_16X16],
- 0, &best_ref_mv1.as_mv, &ref_mv->as_mv);
+ vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
+ &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv);
-#if ALT_REF_SUBPEL_ENABLED
// Try sub-pixel MC?
// if (bestsme > error_thresh && bestsme < INT_MAX)
{
int distortion;
unsigned int sse;
// Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv,
- &best_ref_mv1.as_mv,
+ bestsme = cpi->find_fractional_mv_step(x, ref_mv,
+ &best_ref_mv1,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
@@ -183,7 +176,6 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
NULL, NULL,
&distortion, &sse);
}
-#endif
// Restore input state
x->plane[0].src = src;
@@ -523,11 +515,16 @@ void vp9_configure_arnr_filter(VP9_COMP *cpi,
cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
// Adjust the strength based on active max q
- q = ((int)vp9_convert_qindex_to_q(cpi->rc.active_worst_quality) >> 1);
- if (q > 8) {
+ if (cpi->common.current_video_frame > 1)
+ q = ((int)vp9_convert_qindex_to_q(
+ cpi->rc.avg_frame_qindex[INTER_FRAME]));
+ else
+ q = ((int)vp9_convert_qindex_to_q(
+ cpi->rc.avg_frame_qindex[KEY_FRAME]));
+ if (q > 16) {
cpi->active_arnr_strength = cpi->oxcf.arnr_strength;
} else {
- cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q);
+ cpi->active_arnr_strength = cpi->oxcf.arnr_strength - ((16 - q) / 2);
if (cpi->active_arnr_strength < 0)
cpi->active_arnr_strength = 0;
}
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index ed1301a8a..7ae110707 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -23,8 +23,8 @@
static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
const TOKENVALUE *vp9_dct_value_tokens_ptr;
-static int dct_value_cost[DCT_MAX_VALUE * 2];
-const int *vp9_dct_value_cost_ptr;
+static int16_t dct_value_cost[DCT_MAX_VALUE * 2];
+const int16_t *vp9_dct_value_cost_ptr;
// Array indices are identical to previously-existing CONTEXT_NODE indices
const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
@@ -160,7 +160,6 @@ struct tokenize_b_args {
VP9_COMP *cpi;
MACROBLOCKD *xd;
TOKENEXTRA **tp;
- TX_SIZE tx_size;
uint8_t *token_cache;
};
@@ -188,6 +187,18 @@ static INLINE void add_token(TOKENEXTRA **t, const vp9_prob *context_tree,
++counts[token];
}
+static INLINE void add_token_no_extra(TOKENEXTRA **t,
+ const vp9_prob *context_tree,
+ uint8_t token,
+ uint8_t skip_eob_node,
+ unsigned int *counts) {
+ (*t)->token = token;
+ (*t)->context_tree = context_tree;
+ (*t)->skip_eob_node = skip_eob_node;
+ (*t)++;
+ ++counts[token];
+}
+
static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
@@ -199,17 +210,22 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
struct macroblockd_plane *pd = &xd->plane[plane];
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
int pt; /* near block/prev token context index */
- int c = 0;
+ int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
- const int16_t *qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
+ const int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
const scan_order *so;
- vp9_coeff_count *const counts = cpi->coef_counts[tx_size];
- vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size];
const int ref = is_inter_block(mbmi);
+ unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ cpi->coef_counts[tx_size][type][ref];
+ vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ cpi->common.fc.coef_probs[tx_size][type][ref];
+ unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
+ cpi->common.counts.eob_branch[tx_size][type][ref];
+
const uint8_t *const band = get_band_translate(tx_size);
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
@@ -225,27 +241,26 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
while (c < eob) {
int v = 0;
int skip_eob = 0;
- v = qcoeff_ptr[scan[c]];
+ v = qcoeff[scan[c]];
while (!v) {
- add_token(&t, coef_probs[type][ref][band[c]][pt], 0, ZERO_TOKEN, skip_eob,
- counts[type][ref][band[c]][pt]);
-
- cpi->common.counts.eob_branch[tx_size][type][ref][band[c]][pt] +=
- !skip_eob;
+ add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob,
+ counts[band[c]][pt]);
+ eob_branch[band[c]][pt] += !skip_eob;
skip_eob = 1;
token_cache[scan[c]] = 0;
++c;
pt = get_coef_context(nb, token_cache, c);
- v = qcoeff_ptr[scan[c]];
+ v = qcoeff[scan[c]];
}
- add_token(&t, coef_probs[type][ref][band[c]][pt],
- vp9_dct_value_tokens_ptr[v].extra,
- vp9_dct_value_tokens_ptr[v].token, skip_eob,
- counts[type][ref][band[c]][pt]);
- cpi->common.counts.eob_branch[tx_size][type][ref][band[c]][pt] += !skip_eob;
+ add_token(&t, coef_probs[band[c]][pt],
+ vp9_dct_value_tokens_ptr[v].extra,
+ (uint8_t)vp9_dct_value_tokens_ptr[v].token,
+ (uint8_t)skip_eob,
+ counts[band[c]][pt]);
+ eob_branch[band[c]][pt] += !skip_eob;
token_cache[scan[c]] =
vp9_pt_energy_class[vp9_dct_value_tokens_ptr[v].token];
@@ -253,9 +268,9 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
pt = get_coef_context(nb, token_cache, c);
}
if (c < seg_eob) {
- add_token(&t, coef_probs[type][ref][band[c]][pt], 0, EOB_TOKEN, 0,
- counts[type][ref][band[c]][pt]);
- ++cpi->common.counts.eob_branch[tx_size][type][ref][band[c]][pt];
+ add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
+ counts[band[c]][pt]);
+ ++eob_branch[band[c]][pt];
}
*tp = t;
@@ -299,8 +314,8 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
const int ctx = vp9_get_skip_context(xd);
const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_SKIP);
- struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};
- if (mbmi->skip_coeff) {
+ struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache};
+ if (mbmi->skip) {
if (!dry_run)
cm->counts.skip[ctx][1] += skip_inc;
reset_skip_context(xd, bsize);
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index ea86240be..063c0bafe 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -47,7 +47,7 @@ struct VP9_COMP;
void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
BLOCK_SIZE bsize);
-extern const int *vp9_dct_value_cost_ptr;
+extern const int16_t *vp9_dct_value_cost_ptr;
/* TODO: The Token field should be broken out into a separate char array to
* improve cache locality, since it's needed for costing when the rest of the
* fields are not.
diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_vaq.c
index 1f9cb8709..600029b19 100644
--- a/vp9/encoder/vp9_vaq.c
+++ b/vp9/encoder/vp9_vaq.c
@@ -19,8 +19,8 @@
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_systemdependent.h"
-#define ENERGY_MIN (-3)
-#define ENERGY_MAX (3)
+#define ENERGY_MIN (-1)
+#define ENERGY_MAX (1)
#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
#define ENERGY_IN_BOUNDS(energy)\
assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
@@ -44,7 +44,7 @@ unsigned int vp9_vaq_segment_id(int energy) {
double vp9_vaq_rdmult_ratio(int energy) {
ENERGY_IN_BOUNDS(energy);
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
return RDMULT_RATIO(energy);
}
@@ -52,7 +52,7 @@ double vp9_vaq_rdmult_ratio(int energy) {
double vp9_vaq_inv_q_ratio(int energy) {
ENERGY_IN_BOUNDS(energy);
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
return Q_RATIO(-energy);
}
@@ -63,9 +63,9 @@ void vp9_vaq_init() {
assert(ENERGY_SPAN <= MAX_SEGMENTS);
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
- base_ratio = 1.8;
+ base_ratio = 1.5;
for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
Q_RATIO(i) = pow(base_ratio, i/3.0);
@@ -75,35 +75,39 @@ void vp9_vaq_init() {
void vp9_vaq_frame_setup(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
struct segmentation *seg = &cm->seg;
- int base_q = vp9_convert_qindex_to_q(cm->base_qindex);
- int base_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex +
- cm->y_dc_delta_q);
+ const double base_q = vp9_convert_qindex_to_q(cm->base_qindex);
+ const int base_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex +
+ cm->y_dc_delta_q);
int i;
- vp9_enable_segmentation((VP9_PTR)cpi);
- vp9_clearall_segfeatures(seg);
+ if (cm->frame_type == KEY_FRAME ||
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ vp9_enable_segmentation((VP9_PTR)cpi);
+ vp9_clearall_segfeatures(seg);
- seg->abs_delta = SEGMENT_DELTADATA;
+ seg->abs_delta = SEGMENT_DELTADATA;
- vp9_clear_system_state(); // __asm emms;
+ vp9_clear_system_state();
- for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
- int qindex_delta, segment_rdmult;
+ for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
+ int qindex_delta, segment_rdmult;
- if (Q_RATIO(i) == 1) {
- // No need to enable SEG_LVL_ALT_Q for this segment
- RDMULT_RATIO(i) = 1;
- continue;
- }
+ if (Q_RATIO(i) == 1) {
+ // No need to enable SEG_LVL_ALT_Q for this segment
+ RDMULT_RATIO(i) = 1;
+ continue;
+ }
- qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i));
- vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
- vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
+ qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i));
+ vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
+ vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
- segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta +
- cm->y_dc_delta_q);
+ segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta +
+ cm->y_dc_delta_q);
- RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult;
+ RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult;
+ }
}
}
@@ -137,11 +141,8 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy;
unsigned int var = block_variance(cpi, x, bs);
- vp9_clear_system_state(); // __asm emms;
-
- // if (var <= 1000)
- // return 0;
+ vp9_clear_system_state();
- energy = 0.9*(logf(var + 1) - 10.0);
- return clamp(round(energy), ENERGY_MIN, ENERGY_MAX);
+ energy = 0.9 * (log(var + 1.0) - 10.0);
+ return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
}
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance.c
index 8bc385089..8bc385089 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance.c
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h
index 5958b4806..1795e05e4 100644
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ b/vp9/encoder/vp9_write_bit_buffer.h
@@ -29,7 +29,7 @@ static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
}
static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
- const int off = wb->bit_offset;
+ const int off = (int)wb->bit_offset;
const int p = off / CHAR_BIT;
const int q = CHAR_BIT - 1 - off % CHAR_BIT;
if (q == CHAR_BIT -1) {
diff --git a/vp9/encoder/vp9_writer.c b/vp9/encoder/vp9_writer.c
index 3d13d07b6..fda1b390e 100644
--- a/vp9/encoder/vp9_writer.c
+++ b/vp9/encoder/vp9_writer.c
@@ -12,11 +12,6 @@
#include "vp9/encoder/vp9_writer.h"
#include "vp9/common/vp9_entropy.h"
-#if defined(SECTIONBITS_OUTPUT)
-unsigned __int64 Sectionbits[500];
-
-#endif
-
#ifdef ENTROPY_STATS
unsigned int active_section = 0;
#endif
diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h
index 62f555c99..defeec377 100644
--- a/vp9/encoder/vp9_writer.h
+++ b/vp9/encoder/vp9_writer.h
@@ -44,17 +44,6 @@ static void vp9_write(vp9_writer *br, int bit, int probability) {
unsigned int lowvalue = br->lowvalue;
register unsigned int shift;
-#ifdef ENTROPY_STATS
-#if defined(SECTIONBITS_OUTPUT)
-
- if (bit)
- Sectionbits[active_section] += vp9_prob_cost[255 - probability];
- else
- Sectionbits[active_section] += vp9_prob_cost[probability];
-
-#endif
-#endif
-
split = 1 + (((range - 1) * probability) >> 8);
range = split;
diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c
index ea031fb07..b5269ed03 100644
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -16,7 +16,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we tranpose the columns (that
+ // as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
@@ -46,7 +46,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
in3 = _mm_slli_epi16(in3, 4);
// if (i == 0 && input[0]) input[0] += 1;
{
- // The mask will only contain wether the first value is zero, all
+ // The mask will only contain whether the first value is zero, all
// other comparison will fail as something shifted by 4 (above << 4)
// can never be equal to one. To increment in the non-zero case, we
// add the mask and one for the first element:
@@ -59,7 +59,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
}
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
- // Transform 1/2: Add/substract
+ // Transform 1/2: Add/subtract
const __m128i r0 = _mm_add_epi16(in0, in3);
const __m128i r1 = _mm_add_epi16(in1, in2);
const __m128i r2 = _mm_sub_epi16(in1, in2);
@@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) {
transpose_4x4_avx2(in);
}
-void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[4];
- load_buffer_4x4_avx2(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct4_avx2(in);
- fdct4_avx2(in);
+ case DCT_DCT:
+ vp9_fdct4x4_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in);
fdct4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_4x4_avx2(input, in, stride);
fdct4_avx2(in);
fadst4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in);
fadst4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
default:
assert(0);
break;
}
- write_buffer_4x4_avx2(output, in);
}
void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
@@ -313,7 +317,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
for (pass = 0; pass < 2; pass++) {
// To store results of each pass before the transpose.
__m128i res0, res1, res2, res3, res4, res5, res6, res7;
- // Add/substract
+ // Add/subtract
const __m128i q0 = _mm_add_epi16(in0, in7);
const __m128i q1 = _mm_add_epi16(in1, in6);
const __m128i q2 = _mm_add_epi16(in2, in5);
@@ -324,7 +328,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
const __m128i q7 = _mm_sub_epi16(in0, in7);
// Work on first four results
{
- // Add/substract
+ // Add/subtract
const __m128i r0 = _mm_add_epi16(q0, q3);
const __m128i r1 = _mm_add_epi16(q1, q2);
const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -386,7 +390,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
// Combine
const __m128i r0 = _mm_packs_epi32(s0, s1);
const __m128i r1 = _mm_packs_epi32(s2, s3);
- // Add/substract
+ // Add/subtract
const __m128i x0 = _mm_add_epi16(q4, r0);
const __m128i x1 = _mm_sub_epi16(q4, r0);
const __m128i x2 = _mm_sub_epi16(q7, r1);
@@ -1028,40 +1032,46 @@ void fadst8_avx2(__m128i *in) {
array_transpose_8x8_avx2(in, in);
}
-void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[8];
- load_buffer_8x8_avx2(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct8_avx2(in);
- fdct8_avx2(in);
+ case DCT_DCT:
+ vp9_fdct8x8_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in);
fdct8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_8x8_avx2(input, in, stride);
fdct8_avx2(in);
fadst8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in);
fadst8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
default:
assert(0);
break;
}
- right_shift_8x8_avx2(in, 1);
- write_buffer_8x8_avx2(output, in, 8);
}
void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we tranpose the columns (that
+ // as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
@@ -1218,7 +1228,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
}
// Work on the first eight values; fdct8(input, even_results);
{
- // Add/substract
+ // Add/subtract
const __m128i q0 = _mm_add_epi16(input0, input7);
const __m128i q1 = _mm_add_epi16(input1, input6);
const __m128i q2 = _mm_add_epi16(input2, input5);
@@ -1229,7 +1239,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
const __m128i q7 = _mm_sub_epi16(input0, input7);
// Work on first four results
{
- // Add/substract
+ // Add/subtract
const __m128i r0 = _mm_add_epi16(q0, q3);
const __m128i r1 = _mm_add_epi16(q1, q2);
const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -1293,7 +1303,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
// Combine
const __m128i r0 = _mm_packs_epi32(s0, s1);
const __m128i r1 = _mm_packs_epi32(s2, s3);
- // Add/substract
+ // Add/subtract
const __m128i x0 = _mm_add_epi16(q4, r0);
const __m128i x1 = _mm_sub_epi16(q4, r0);
const __m128i x2 = _mm_sub_epi16(q7, r1);
@@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) {
array_transpose_16x16_avx2(in0, in1);
}
-void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht16x16_avx2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in0[16], in1[16];
- load_buffer_16x16_avx2(input, in0, in1, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct16_avx2(in0, in1);
- right_shift_16x16_avx2(in0, in1);
- fdct16_avx2(in0, in1);
+ case DCT_DCT:
+ vp9_fdct16x16_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fdct16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fdct16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
default:
assert(0);
break;
}
- write_buffer_16x16_avx2(output, in0, in1, 16);
}
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index c876cc273..f3735ebd3 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -16,7 +16,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we tranpose the columns (that
+ // as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
@@ -47,7 +47,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
in1 = _mm_slli_epi16(in1, 4);
// if (i == 0 && input[0]) input[0] += 1;
{
- // The mask will only contain wether the first value is zero, all
+ // The mask will only contain whether the first value is zero, all
// other comparison will fail as something shifted by 4 (above << 4)
// can never be equal to one. To increment in the non-zero case, we
// add the mask and one for the first element:
@@ -60,7 +60,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
}
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
- // Transform 1/2: Add/substract
+ // Transform 1/2: Add/subtract
const __m128i r0 = _mm_add_epi16(in0, in1);
const __m128i r1 = _mm_sub_epi16(in0, in1);
const __m128i r2 = _mm_unpacklo_epi64(r0, r1);
@@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) {
transpose_4x4(in);
}
-void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[4];
- load_buffer_4x4(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct4_sse2(in);
- fdct4_sse2(in);
+ case DCT_DCT:
+ vp9_fdct4x4_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fdct4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_4x4(input, in, stride);
fdct4_sse2(in);
fadst4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fadst4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- default:
- assert(0);
- break;
+ default:
+ assert(0);
+ break;
}
- write_buffer_4x4(output, in);
}
void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
@@ -311,7 +315,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
for (pass = 0; pass < 2; pass++) {
// To store results of each pass before the transpose.
__m128i res0, res1, res2, res3, res4, res5, res6, res7;
- // Add/substract
+ // Add/subtract
const __m128i q0 = _mm_add_epi16(in0, in7);
const __m128i q1 = _mm_add_epi16(in1, in6);
const __m128i q2 = _mm_add_epi16(in2, in5);
@@ -322,7 +326,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
const __m128i q7 = _mm_sub_epi16(in0, in7);
// Work on first four results
{
- // Add/substract
+ // Add/subtract
const __m128i r0 = _mm_add_epi16(q0, q3);
const __m128i r1 = _mm_add_epi16(q1, q2);
const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -384,7 +388,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
// Combine
const __m128i r0 = _mm_packs_epi32(s0, s1);
const __m128i r1 = _mm_packs_epi32(s2, s3);
- // Add/substract
+ // Add/subtract
const __m128i x0 = _mm_add_epi16(q4, r0);
const __m128i x1 = _mm_sub_epi16(q4, r0);
const __m128i x2 = _mm_sub_epi16(q7, r1);
@@ -1026,40 +1030,46 @@ void fadst8_sse2(__m128i *in) {
array_transpose_8x8(in, in);
}
-void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[8];
- load_buffer_8x8(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct8_sse2(in);
- fdct8_sse2(in);
+ case DCT_DCT:
+ vp9_fdct8x8_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_8x8(input, in, stride);
fdct8_sse2(in);
fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
default:
assert(0);
break;
}
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
}
void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we tranpose the columns (that
+ // as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
@@ -1216,7 +1226,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
}
// Work on the first eight values; fdct8(input, even_results);
{
- // Add/substract
+ // Add/subtract
const __m128i q0 = _mm_add_epi16(input0, input7);
const __m128i q1 = _mm_add_epi16(input1, input6);
const __m128i q2 = _mm_add_epi16(input2, input5);
@@ -1227,7 +1237,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
const __m128i q7 = _mm_sub_epi16(input0, input7);
// Work on first four results
{
- // Add/substract
+ // Add/subtract
const __m128i r0 = _mm_add_epi16(q0, q3);
const __m128i r1 = _mm_add_epi16(q1, q2);
const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -1291,7 +1301,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// Combine
const __m128i r0 = _mm_packs_epi32(s0, s1);
const __m128i r1 = _mm_packs_epi32(s2, s3);
- // Add/substract
+ // Add/subtract
const __m128i x0 = _mm_add_epi16(q4, r0);
const __m128i x1 = _mm_sub_epi16(q4, r0);
const __m128i x2 = _mm_sub_epi16(q7, r1);
@@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
}
-void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in0[16], in1[16];
- load_buffer_16x16(input, in0, in1, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdct16_sse2(in0, in1);
+ case DCT_DCT:
+ vp9_fdct16x16_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_16x16(input, in0, in1, stride);
fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
default:
assert(0);
break;
}
- write_buffer_16x16(output, in0, in1, 16);
}
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index db306603b..48ccef8cc 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -188,7 +188,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pmaxsw m8, m7
pshuflw m7, m8, 0x1
pmaxsw m8, m7
- pextrw [r2], m8, 0
+ pextrw r6, m8, 0
+ mov [r2], r6
RET
; skip-block, i.e. just write all zeroes
@@ -214,5 +215,5 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
%endmacro
INIT_XMM ssse3
-QUANTIZE_FN b, 6
+QUANTIZE_FN b, 7
QUANTIZE_FN b_32x32, 7
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
new file mode 100644
index 000000000..b8bfa8900
--- /dev/null
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -0,0 +1,641 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h> // AVX2
+#include "vpx_ports/mem.h"
+#include "vp9/encoder/vp9_variance.h"
+
+DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
+ 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
+ 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
+ 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
+ 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
+ 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
+ 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
+ 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
+ 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
+ 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
+ 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
+ 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
+ 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
+ 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
+ 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
+ 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
+ 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
+ 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
+ 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
+ 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
+ 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
+ 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
+ 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
+ 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
+ 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
+ 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
+ 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
+ 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
+ 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
+ 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
+};
+
+unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ int height,
+ unsigned int *sse) {
+ __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
+ __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi;
+ __m256i zero_reg;
+ int i, sum;
+ sum_reg = _mm256_set1_epi16(0);
+ sse_reg = _mm256_set1_epi16(0);
+ zero_reg = _mm256_set1_epi16(0);
+
+ if (x_offset == 0) {
+ // x_offset = 0 and y_offset = 0
+ if (y_offset == 0) {
+ for (i = 0; i < height ; i++) {
+ // load source and destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+
+ // expend each byte to 2 bytes
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, zero_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, zero_reg);
+
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ src+= src_stride;
+ dst+= dst_stride;
+ }
+ // x_offset = 0 and y_offset = 8
+ } else if (y_offset == 8) {
+ __m256i src_next_reg;
+ for (i = 0; i < height ; i++) {
+ // load source + next source + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *)
+ (src + src_stride));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+ // average between current and next stride source
+ src_reg = _mm256_avg_epu8(src_reg, src_next_reg);
+
+ // expend each byte to 2 bytes
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, zero_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, zero_reg);
+
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ src+= src_stride;
+ dst+= dst_stride;
+ }
+ // x_offset = 0 and y_offset = bilin interpolation
+ } else {
+ __m256i filter, pw8, src_next_reg;
+#if (ARCH_X86_64)
+ int64_t y_offset64;
+ y_offset64 = y_offset;
+ y_offset64 <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + y_offset64));
+#else
+ y_offset <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + y_offset));
+#endif
+ pw8 = _mm256_set1_epi16(8);
+ for (i = 0; i < height ; i++) {
+ // load current and next source + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *)
+ (src + src_stride));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+
+ // merge current and next source
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, src_next_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, src_next_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter);
+
+ // add 8 to the source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // expand each byte to 2 byte in the destination
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ src+= src_stride;
+ dst+= dst_stride;
+ }
+ }
+ // x_offset = 8 and y_offset = 0
+ } else if (x_offset == 8) {
+ if (y_offset == 0) {
+ __m256i src_next_reg;
+ for (i = 0; i < height ; i++) {
+ // load source and another source starting from the next
+ // following byte + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+
+ // average between source and the next byte following source
+ src_reg = _mm256_avg_epu8(src_reg, src_next_reg);
+
+ // expand each byte to 2 bytes
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, zero_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, zero_reg);
+
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ src+= src_stride;
+ dst+= dst_stride;
+ }
+ // x_offset = 8 and y_offset = 8
+ } else if (y_offset == 8) {
+ __m256i src_next_reg, src_avg;
+ // load source and another source starting from the next
+ // following byte
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+
+ // average between source and the next byte following source
+ src_avg = _mm256_avg_epu8(src_reg, src_next_reg);
+ for (i = 0; i < height ; i++) {
+ src+= src_stride;
+ // load source and another source starting from the next
+ // following byte + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+ // average between source and the next byte following source
+ src_reg = _mm256_avg_epu8(src_reg, src_next_reg);
+
+ // expand each byte to 2 bytes
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // average between previous average to current average
+ src_avg = _mm256_avg_epu8(src_avg, src_reg);
+ // expand each byte to 2 bytes
+ exp_src_lo = _mm256_unpacklo_epi8(src_avg, zero_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_avg, zero_reg);
+
+ // save current source average
+ src_avg = src_reg;
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ dst+= dst_stride;
+ }
+ // x_offset = 8 and y_offset = bilin interpolation
+ } else {
+ __m256i filter, pw8, src_next_reg, src_avg;
+#if (ARCH_X86_64)
+ int64_t y_offset64;
+ y_offset64 = y_offset;
+ y_offset64 <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + y_offset64));
+#else
+ y_offset <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + y_offset));
+#endif
+ pw8 = _mm256_set1_epi16(8);
+ // load source and another source starting from the next
+ // following byte
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ // average between source and the next byte following source
+ src_avg = _mm256_avg_epu8(src_reg, src_next_reg);
+ for (i = 0; i < height ; i++) {
+ src+= src_stride;
+ // load source and another source starting from the next
+ // following byte + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+ // average between source and the next byte following source
+ src_reg = _mm256_avg_epu8(src_reg, src_next_reg);
+
+ // merge previous average and current average
+ exp_src_lo = _mm256_unpacklo_epi8(src_avg, src_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_avg, src_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter);
+
+ // add 8 to the source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide the source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // expand each byte to 2 bytes
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // save current source average
+ src_avg = src_reg;
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ dst+= dst_stride;
+ }
+ }
+ // x_offset = bilin interpolation and y_offset = 0
+ } else {
+ if (y_offset == 0) {
+ __m256i filter, pw8, src_next_reg;
+#if (ARCH_X86_64)
+ int64_t x_offset64;
+ x_offset64 = x_offset;
+ x_offset64 <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + x_offset64));
+#else
+ x_offset <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + x_offset));
+#endif
+ pw8 = _mm256_set1_epi16(8);
+ for (i = 0; i < height ; i++) {
+ // load source and another source starting from the next
+ // following byte + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+
+ // merge current and next source
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, src_next_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, src_next_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter);
+
+ // add 8 to source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide the source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // expand each byte to 2 bytes
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ src+= src_stride;
+ dst+= dst_stride;
+ }
+ // x_offset = bilin interpolation and y_offset = 8
+ } else if (y_offset == 8) {
+ __m256i filter, pw8, src_next_reg, src_pack;
+#if (ARCH_X86_64)
+ int64_t x_offset64;
+ x_offset64 = x_offset;
+ x_offset64 <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + x_offset64));
+#else
+ x_offset <<= 5;
+ filter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + x_offset));
+#endif
+ pw8 = _mm256_set1_epi16(8);
+ // load source and another source starting from the next
+ // following byte
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+
+ // merge current and next stride source
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, src_next_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, src_next_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter);
+
+ // add 8 to source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // convert each 16 bit to 8 bit to each low and high lane source
+ src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
+ for (i = 0; i < height ; i++) {
+ src+= src_stride;
+
+ // load source and another source starting from the next
+ // following byte + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+
+ // merge current and next stride source
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, src_next_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, src_next_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter);
+
+ // add 8 to source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // convert each 16 bit to 8 bit to each low and high lane source
+ src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
+ // average between previous pack to the current
+ src_pack = _mm256_avg_epu8(src_pack, src_reg);
+
+ // expand each byte to 2 bytes
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ exp_src_lo = _mm256_unpacklo_epi8(src_pack, zero_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_pack, zero_reg);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // calculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ // save previous pack
+ src_pack = src_reg;
+ dst+= dst_stride;
+ }
+ // x_offset = bilin interpolation and y_offset = bilin interpolation
+ } else {
+ __m256i xfilter, yfilter, pw8, src_next_reg, src_pack;
+#if (ARCH_X86_64)
+ int64_t x_offset64, y_offset64;
+ x_offset64 = x_offset;
+ x_offset64 <<= 5;
+ y_offset64 = y_offset;
+ y_offset64 <<= 5;
+ xfilter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + x_offset64));
+ yfilter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + y_offset64));
+#else
+ x_offset <<= 5;
+ xfilter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + x_offset));
+ y_offset <<= 5;
+ yfilter = _mm256_load_si256(
+ (__m256i const *)(bilinear_filters_avx2 + y_offset));
+#endif
+ pw8 = _mm256_set1_epi16(8);
+ // load source and another source starting from the next
+ // following byte
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ // merge current and next stride source
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, src_next_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, src_next_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, xfilter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, xfilter);
+
+ // add 8 to the source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide the source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // convert each 16 bit to 8 bit to each low and high lane source
+ src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
+ for (i = 0; i < height ; i++) {
+ src+= src_stride;
+ // load source and another source starting from the next
+ // following byte + destination
+ src_reg = _mm256_loadu_si256((__m256i const *) (src));
+ src_next_reg = _mm256_loadu_si256((__m256i const *) (src + 1));
+ dst_reg = _mm256_load_si256((__m256i const *) (dst));
+
+ // merge current and next stride source
+ exp_src_lo = _mm256_unpacklo_epi8(src_reg, src_next_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_reg, src_next_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, xfilter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, xfilter);
+
+ // add 8 to source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // convert each 16 bit to 8 bit to each low and high lane source
+ src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
+
+ // merge previous pack to current pack source
+ exp_src_lo = _mm256_unpacklo_epi8(src_pack, src_reg);
+ exp_src_hi = _mm256_unpackhi_epi8(src_pack, src_reg);
+
+ // filter the source
+ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, yfilter);
+ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, yfilter);
+
+ // expand each byte to 2 bytes
+ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg);
+ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg);
+
+ // add 8 to source
+ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8);
+ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8);
+
+ // divide source by 16
+ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4);
+ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
+
+ // source - dest
+ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo);
+ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi);
+
+ // caculate sum
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo);
+ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo);
+ sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi);
+ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi);
+
+ // calculate sse
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo);
+ sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
+
+ src_pack = src_reg;
+ dst+= dst_stride;
+ }
+ }
+ }
+ // sum < 0
+ res_cmp = _mm256_cmpgt_epi16(zero_reg, sum_reg);
+ // save the next 8 bytes of each lane of sse
+ sse_reg_hi = _mm256_srli_si256(sse_reg, 8);
+ // merge the result of sum < 0 with sum to add sign to the next 16 bits
+ sum_reg_lo = _mm256_unpacklo_epi16(sum_reg, res_cmp);
+ sum_reg_hi = _mm256_unpackhi_epi16(sum_reg, res_cmp);
+ // add each 8 bytes from every lane of sse and sum
+ sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi);
+ sum_reg = _mm256_add_epi32(sum_reg_lo, sum_reg_hi);
+
+ // save the next 4 bytes of each lane sse
+ sse_reg_hi = _mm256_srli_si256(sse_reg, 4);
+ // save the next 8 bytes of each lane of sum
+ sum_reg_hi = _mm256_srli_si256(sum_reg, 8);
+
+ // add the first 4 bytes to the next 4 bytes sse
+ sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi);
+ // add the first 8 bytes to the next 8 bytes
+ sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi);
+ // extract the low lane and the high lane and add the results
+ *((int*)sse)= _mm_cvtsi128_si32(_mm256_castsi256_si128(sse_reg)) +
+ _mm_cvtsi128_si32(_mm256_extractf128_si256(sse_reg, 1));
+ sum_reg_hi = _mm256_srli_si256(sum_reg, 4);
+ sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi);
+ sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_reg)) +
+ _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_reg, 1));
+ return sum;
+}
diff --git a/vp9/encoder/x86/vp9_variance_avx2.c b/vp9/encoder/x86/vp9_variance_avx2.c
index c9b90d52d..02007a3bd 100644
--- a/vp9/encoder/x86/vp9_variance_avx2.c
+++ b/vp9/encoder/x86/vp9_variance_avx2.c
@@ -42,6 +42,18 @@ void vp9_get32x32var_avx2
int *Sum
);
+unsigned int vp9_sub_pixel_variance32xh_avx2
+(
+ const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ int height,
+ unsigned int *sse
+);
+
static void variance_avx2(const unsigned char *src_ptr, int source_stride,
const unsigned char *ref_ptr, int recon_stride,
int w, int h, unsigned int *sse, int *sum,
@@ -155,3 +167,43 @@ unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr,
*sse = var;
return (var - (((int64_t)avg * avg) >> 11));
}
+
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse_ptr) {
+ // processing 32 elements in parallel
+ unsigned int sse;
+ int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ 64, &sse);
+ // processing the next 32 elements in parallel
+ unsigned int sse2;
+ int se2 = vp9_sub_pixel_variance32xh_avx2(src + 32, src_stride,
+ x_offset, y_offset,
+ dst + 32, dst_stride,
+ 64, &sse2);
+ se += se2;
+ sse += sse2;
+ *sse_ptr = sse;
+ return sse - (((int64_t)se * se) >> 12);
+}
+
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse_ptr) {
+ // processing 32 element in parallel
+ unsigned int sse;
+ int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ 32, &sse);
+ *sse_ptr = sse;
+ return sse - (((int64_t)se * se) >> 10);
+}
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index c691411bf..9fb611504 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -23,7 +23,8 @@ VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
VP9_COMMON_SRCS-yes += common/vp9_filter.c
VP9_COMMON_SRCS-yes += common/vp9_filter.h
-VP9_COMMON_SRCS-yes += common/generic/vp9_systemdependent.c
+VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.c
+VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.h
VP9_COMMON_SRCS-yes += common/vp9_idct.c
VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h
VP9_COMMON_SRCS-yes += common/vp9_blockd.h
@@ -76,12 +77,15 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_bilinear_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_subpixel_8t_intrin_avx2.c
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
endif
-ifeq ($(USE_X86INC),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 6b181710e..d7713fd3f 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -175,6 +175,23 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, ss_number_layers, 1,
VPX_SS_MAX_LAYERS); /*Spatial layers max */
+
+ RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
+ if (cfg->ts_number_layers > 1) {
+ unsigned int i;
+ for (i = 1; i < cfg->ts_number_layers; ++i) {
+ if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) {
+ ERROR("ts_target_bitrate entries are not increasing");
+ }
+ }
+ RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1);
+ for (i = cfg->ts_number_layers-2; i > 0; --i) {
+ if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) {
+ ERROR("ts_rate_decimator factors are not powers of 2");
+ }
+ }
+ }
+
/* VP8 does not support a lower bound on the keyframe interval in
* automatic keyframe placement mode.
*/
@@ -205,7 +222,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
FIRSTPASS_STATS *stats;
- if (!cfg->rc_twopass_stats_in.buf)
+ if (cfg->rc_twopass_stats_in.buf == NULL)
ERROR("rc_twopass_stats_in.buf not set.");
if (cfg->rc_twopass_stats_in.sz % packet_sz)
@@ -247,7 +264,7 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
vpx_codec_enc_cfg_t cfg,
- struct vp9_extracfg vp8_cfg) {
+ struct vp9_extracfg vp9_cfg) {
oxcf->version = cfg.g_profile;
oxcf->width = cfg.g_w;
oxcf->height = cfg.g_h;
@@ -272,30 +289,25 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
}
if (cfg.g_pass == VPX_RC_FIRST_PASS) {
- oxcf->allow_lag = 0;
oxcf->lag_in_frames = 0;
} else {
- oxcf->allow_lag = (cfg.g_lag_in_frames) > 0;
oxcf->lag_in_frames = cfg.g_lag_in_frames;
}
- // VBR only supported for now.
- // CBR code has been deprectated for experimental phase.
- // CQ mode not yet tested
- oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
+ oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
if (cfg.rc_end_usage == VPX_CQ)
- oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
+ oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
else if (cfg.rc_end_usage == VPX_Q)
- oxcf->end_usage = USAGE_CONSTANT_QUALITY;
+ oxcf->end_usage = USAGE_CONSTANT_QUALITY;
else if (cfg.rc_end_usage == VPX_CBR)
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
oxcf->target_bandwidth = cfg.rc_target_bitrate;
- oxcf->rc_max_intra_bitrate_pct = vp8_cfg.rc_max_intra_bitrate_pct;
+ oxcf->rc_max_intra_bitrate_pct = vp9_cfg.rc_max_intra_bitrate_pct;
oxcf->best_allowed_q = cfg.rc_min_quantizer;
oxcf->worst_allowed_q = cfg.rc_max_quantizer;
- oxcf->cq_level = vp8_cfg.cq_level;
+ oxcf->cq_level = vp9_cfg.cq_level;
oxcf->fixed_q = -1;
oxcf->under_shoot_pct = cfg.rc_undershoot_pct;
@@ -316,35 +328,52 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
// oxcf->kf_min_dist = cfg.kf_min_dis;
oxcf->key_freq = cfg.kf_max_dist;
- // oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile;
- // strcpy(oxcf->first_pass_file, cfg.g_firstpass_file);
-
- oxcf->cpu_used = vp8_cfg.cpu_used;
- oxcf->encode_breakout = vp8_cfg.static_thresh;
- oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref;
- oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity;
- oxcf->sharpness = vp8_cfg.sharpness;
+ oxcf->cpu_used = vp9_cfg.cpu_used;
+ oxcf->encode_breakout = vp9_cfg.static_thresh;
+ oxcf->play_alternate = vp9_cfg.enable_auto_alt_ref;
+ oxcf->noise_sensitivity = vp9_cfg.noise_sensitivity;
+ oxcf->sharpness = vp9_cfg.sharpness;
oxcf->two_pass_stats_in = cfg.rc_twopass_stats_in;
- oxcf->output_pkt_list = vp8_cfg.pkt_list;
+ oxcf->output_pkt_list = vp9_cfg.pkt_list;
- oxcf->arnr_max_frames = vp8_cfg.arnr_max_frames;
- oxcf->arnr_strength = vp8_cfg.arnr_strength;
- oxcf->arnr_type = vp8_cfg.arnr_type;
+ oxcf->arnr_max_frames = vp9_cfg.arnr_max_frames;
+ oxcf->arnr_strength = vp9_cfg.arnr_strength;
+ oxcf->arnr_type = vp9_cfg.arnr_type;
- oxcf->tuning = vp8_cfg.tuning;
+ oxcf->tuning = vp9_cfg.tuning;
- oxcf->tile_columns = vp8_cfg.tile_columns;
- oxcf->tile_rows = vp8_cfg.tile_rows;
+ oxcf->tile_columns = vp9_cfg.tile_columns;
+ oxcf->tile_rows = vp9_cfg.tile_rows;
- oxcf->lossless = vp8_cfg.lossless;
+ oxcf->lossless = vp9_cfg.lossless;
oxcf->error_resilient_mode = cfg.g_error_resilient;
- oxcf->frame_parallel_decoding_mode = vp8_cfg.frame_parallel_decoding_mode;
+ oxcf->frame_parallel_decoding_mode = vp9_cfg.frame_parallel_decoding_mode;
- oxcf->aq_mode = vp8_cfg.aq_mode;
+ oxcf->aq_mode = vp9_cfg.aq_mode;
oxcf->ss_number_layers = cfg.ss_number_layers;
+
+ if (oxcf->ss_number_layers > 1) {
+ memcpy(oxcf->ss_target_bitrate, cfg.ss_target_bitrate,
+ sizeof(cfg.ss_target_bitrate));
+ } else if (oxcf->ss_number_layers == 1) {
+ oxcf->ss_target_bitrate[0] = oxcf->target_bandwidth;
+ }
+
+ oxcf->ts_number_layers = cfg.ts_number_layers;
+
+ if (oxcf->ts_number_layers > 1) {
+ memcpy(oxcf->ts_target_bitrate, cfg.ts_target_bitrate,
+ sizeof(cfg.ts_target_bitrate));
+ memcpy(oxcf->ts_rate_decimator, cfg.ts_rate_decimator,
+ sizeof(cfg.ts_rate_decimator));
+ } else if (oxcf->ts_number_layers == 1) {
+ oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
+ oxcf->ts_rate_decimator[0] = 1;
+ }
+
/*
printf("Current VP9 Settings: \n");
printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
@@ -352,7 +381,6 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
printf("sharpness: %d\n", oxcf->sharpness);
printf("cpu_used: %d\n", oxcf->cpu_used);
printf("Mode: %d\n", oxcf->mode);
- // printf("delete_first_pass_file: %d\n", oxcf->delete_first_pass_file);
printf("auto_key: %d\n", oxcf->auto_key);
printf("key_freq: %d\n", oxcf->key_freq);
printf("end_usage: %d\n", oxcf->end_usage);
@@ -367,7 +395,6 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
- printf("allow_lag: %d\n", oxcf->allow_lag);
printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
printf("play_alternate: %d\n", oxcf->play_alternate);
printf("Version: %d\n", oxcf->Version);
@@ -396,7 +423,7 @@ static vpx_codec_err_t vp9e_set_config(vpx_codec_alg_priv_t *ctx,
res = validate_config(ctx, cfg, &ctx->vp8_cfg);
- if (!res) {
+ if (res == VPX_CODEC_OK) {
ctx->cfg = *cfg;
set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -416,8 +443,7 @@ static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx,
#define MAP(id, var) case id: *(RECAST(id, arg)) = var; break
- if (!arg)
- return VPX_CODEC_INVALID_PARAM;
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
switch (ctrl_id) {
MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi));
@@ -459,7 +485,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
res = validate_config(ctx, &ctx->cfg, &xcfg);
- if (!res) {
+ if (res == VPX_CODEC_OK) {
ctx->vp8_cfg = xcfg;
set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -478,12 +504,10 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
VP9_PTR optr;
- if (!ctx->priv) {
+ if (ctx->priv == NULL) {
priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
- if (!priv) {
- return VPX_CODEC_MEM_ERROR;
- }
+ if (priv == NULL) return VPX_CODEC_MEM_ERROR;
ctx->priv = &priv->base;
ctx->priv->sz = sizeof(*ctx->priv);
@@ -520,21 +544,19 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
priv->cx_data = malloc(priv->cx_data_sz);
- if (!priv->cx_data) {
- return VPX_CODEC_MEM_ERROR;
- }
+ if (priv->cx_data == NULL) return VPX_CODEC_MEM_ERROR;
vp9_initialize_enc();
res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
- if (!res) {
+ if (res == VPX_CODEC_OK) {
set_vp9e_config(&ctx->priv->alg_priv->oxcf,
ctx->priv->alg_priv->cfg,
ctx->priv->alg_priv->vp8_cfg);
optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
- if (!optr)
+ if (optr == NULL)
res = VPX_CODEC_MEM_ERROR;
else
ctx->priv->alg_priv->cpi = optr;
@@ -621,7 +643,7 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
*x++ = marker;
for (i = 0; i < ctx->pending_frame_count; i++) {
- int this_sz = ctx->pending_frame_sizes[i];
+ unsigned int this_sz = (unsigned int)ctx->pending_frame_sizes[i];
for (j = 0; j <= mag; j++) {
*x++ = this_sz & 0xff;
@@ -702,7 +724,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx,
}
/* Initialize the encoder instance on the first frame. */
- if (!res && ctx->cpi) {
+ if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
unsigned int lib_flags;
YV12_BUFFER_CONFIG sd;
int64_t dst_time_stamp, dst_end_time_stamp;
@@ -762,8 +784,8 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx,
VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
/* Pack invisible frames with the next visible frame */
- if (!cpi->common.show_frame) {
- if (!ctx->pending_cx_data)
+ if (cpi->common.show_frame == 0) {
+ if (ctx->pending_cx_data == 0)
ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
@@ -788,7 +810,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx,
if (lib_flags & FRAMEFLAGS_KEY)
pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
- if (!cpi->common.show_frame) {
+ if (cpi->common.show_frame == 0) {
pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
// This timestamp should be as close as possible to the
@@ -862,10 +884,9 @@ static const vpx_codec_cx_pkt_t *vp9e_get_cxdata(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
- vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+ vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *);
- if (data) {
- vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ if (frame != NULL) {
YV12_BUFFER_CONFIG sd;
image2yuvconfig(&frame->img, &sd);
@@ -880,10 +901,9 @@ static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
- vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+ vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *);
- if (data) {
- vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ if (frame != NULL) {
YV12_BUFFER_CONFIG sd;
image2yuvconfig(&frame->img, &sd);
@@ -898,13 +918,13 @@ static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
- vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
+ vp9_ref_frame_t *frame = va_arg(args, vp9_ref_frame_t *);
- if (data) {
+ if (frame != NULL) {
YV12_BUFFER_CONFIG* fb;
- vp9_get_reference_enc(ctx->cpi, data->idx, &fb);
- yuvconfig2image(&data->img, fb, NULL);
+ vp9_get_reference_enc(ctx->cpi, frame->idx, &fb);
+ yuvconfig2image(&frame->img, fb, NULL);
return VPX_CODEC_OK;
} else {
return VPX_CODEC_INVALID_PARAM;
@@ -915,11 +935,11 @@ static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
#if CONFIG_VP9_POSTPROC
- vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+ vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
(void)ctr_id;
- if (data) {
- ctx->preview_ppcfg = *((vp8_postproc_cfg_t *)data);
+ if (config != NULL) {
+ ctx->preview_ppcfg = *config;
return VPX_CODEC_OK;
} else {
return VPX_CODEC_INVALID_PARAM;
@@ -993,20 +1013,14 @@ static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
- vpx_scaling_mode_t *data = va_arg(args, vpx_scaling_mode_t *);
+ vpx_scaling_mode_t *scalemode = va_arg(args, vpx_scaling_mode_t *);
- if (data) {
+ if (scalemode != NULL) {
int res;
- vpx_scaling_mode_t scalemode = *(vpx_scaling_mode_t *)data;
res = vp9_set_internal_size(ctx->cpi,
- (VPX_SCALING)scalemode.h_scaling_mode,
- (VPX_SCALING)scalemode.v_scaling_mode);
-
- if (!res) {
- return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
- }
+ (VPX_SCALING)scalemode->h_scaling_mode,
+ (VPX_SCALING)scalemode->v_scaling_mode);
+ return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM;
} else {
return VPX_CODEC_INVALID_PARAM;
}
@@ -1016,32 +1030,54 @@ static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
va_list args) {
int data = va_arg(args, int);
vp9_set_svc(ctx->cpi, data);
+ // CBR mode for SVC with both temporal and spatial layers not yet supported.
+ if (data == 1 &&
+ ctx->cfg.rc_end_usage == VPX_CBR &&
+ ctx->cfg.ss_number_layers > 1 &&
+ ctx->cfg.ts_number_layers > 1) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *);
+ VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
+ cpi->svc.spatial_layer_id = data->spatial_layer_id;
+ cpi->svc.temporal_layer_id = data->temporal_layer_id;
+ // Checks on valid layer_id input.
+ if (cpi->svc.temporal_layer_id < 0 ||
+ cpi->svc.temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+ if (cpi->svc.spatial_layer_id < 0 ||
+ cpi->svc.spatial_layer_id >= (int)ctx->cfg.ss_number_layers) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
return VPX_CODEC_OK;
}
static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
int ctr_id, va_list args) {
- vpx_svc_parameters_t *data = va_arg(args, vpx_svc_parameters_t *);
VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
- vpx_svc_parameters_t params;
+ vpx_svc_parameters_t *params = va_arg(args, vpx_svc_parameters_t *);
- if (data == NULL) {
- return VPX_CODEC_INVALID_PARAM;
- }
+ if (params == NULL) return VPX_CODEC_INVALID_PARAM;
- params = *(vpx_svc_parameters_t *)data;
+ cpi->svc.spatial_layer_id = params->spatial_layer;
+ cpi->svc.temporal_layer_id = params->temporal_layer;
- cpi->current_layer = params.layer;
- cpi->lst_fb_idx = params.lst_fb_idx;
- cpi->gld_fb_idx = params.gld_fb_idx;
- cpi->alt_fb_idx = params.alt_fb_idx;
+ cpi->lst_fb_idx = params->lst_fb_idx;
+ cpi->gld_fb_idx = params->gld_fb_idx;
+ cpi->alt_fb_idx = params->alt_fb_idx;
- if (vp9_set_size_literal(ctx->cpi, params.width, params.height) != 0) {
+ if (vp9_set_size_literal(ctx->cpi, params->width, params->height) != 0)
return VPX_CODEC_INVALID_PARAM;
- }
- ctx->cfg.rc_max_quantizer = params.max_quantizer;
- ctx->cfg.rc_min_quantizer = params.min_quantizer;
+ ctx->cfg.rc_max_quantizer = params->max_quantizer;
+ ctx->cfg.rc_min_quantizer = params->min_quantizer;
set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -1080,6 +1116,7 @@ static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = {
{VP9_GET_REFERENCE, get_reference},
{VP9E_SET_SVC, vp9e_set_svc},
{VP9E_SET_SVC_PARAMETERS, vp9e_set_svc_parameters},
+ {VP9E_SET_SVC_LAYER_ID, vp9e_set_svc_layer_id},
{ -1, NULL},
};
@@ -1130,9 +1167,13 @@ static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = {
9999, /* kf_max_dist */
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
-
+ {0}, /* ss_target_bitrate */
+ 1, /* ts_number_layers */
+ {0}, /* ts_target_bitrate */
+ {0}, /* ts_rate_decimator */
+ 0, /* ts_periodicity */
+ {0}, /* ts_layer_id */
#if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION)
- 1, /* g_delete_first_pass_file */
"vp8.fpf" /* first pass filename */
#endif
}
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 92c6cd20c..b85e17237 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -15,6 +15,7 @@
#include "vpx/vp8dx.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "./vpx_version.h"
+#include "vp9/common/vp9_frame_buffers.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
@@ -59,6 +60,11 @@ struct vpx_codec_alg_priv {
int img_setup;
int img_avail;
int invert_tile_order;
+
+ // External frame buffer info to save for VP9 common.
+ void *ext_priv; // Private data associated with the external frame buffers.
+ vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
};
static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si,
@@ -148,14 +154,12 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz,
{
struct vp9_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
const int frame_marker = vp9_rb_read_literal(&rb, 2);
- const int version = vp9_rb_read_bit(&rb) | (vp9_rb_read_bit(&rb) << 1);
+ const int version = vp9_rb_read_bit(&rb);
+ (void) vp9_rb_read_bit(&rb); // unused version bit
+
if (frame_marker != VP9_FRAME_MARKER)
return VPX_CODEC_UNSUP_BITSTREAM;
-#if CONFIG_NON420
if (version > 1) return VPX_CODEC_UNSUP_BITSTREAM;
-#else
- if (version != 0) return VPX_CODEC_UNSUP_BITSTREAM;
-#endif
if (vp9_rb_read_bit(&rb)) { // show an existing frame
return VPX_CODEC_OK;
@@ -206,7 +210,7 @@ static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx,
? sizeof(vp9_stream_info_t)
: sizeof(vpx_codec_stream_info_t);
memcpy(si, &ctx->si, sz);
- si->sz = sz;
+ si->sz = (unsigned int)sz;
return VPX_CODEC_OK;
}
@@ -291,10 +295,31 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
ctx->postproc_cfg.noise_level = 0;
}
- if (!optr)
+ if (!optr) {
res = VPX_CODEC_ERROR;
- else
+ } else {
+ VP9D_COMP *const pbi = (VP9D_COMP*)optr;
+ VP9_COMMON *const cm = &pbi->common;
+
+ // Set index to not initialized.
+ cm->new_fb_idx = -1;
+
+ if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
+ cm->get_fb_cb = ctx->get_ext_fb_cb;
+ cm->release_fb_cb = ctx->release_ext_fb_cb;
+ cm->cb_priv = ctx->ext_priv;
+ } else {
+ cm->get_fb_cb = vp9_get_frame_buffer;
+ cm->release_fb_cb = vp9_release_frame_buffer;
+
+ if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to initialize internal frame buffers");
+ cm->cb_priv = &cm->int_frame_buffers;
+ }
+
ctx->pbi = optr;
+ }
}
ctx->decoder_init = 1;
@@ -332,7 +357,11 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
&time_end_stamp, &flags)) {
+ VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
+ VP9_COMMON *const cm = &pbi->common;
yuvconfig2image(&ctx->img, &sd, user_priv);
+
+ ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
ctx->img_avail = 1;
}
}
@@ -429,7 +458,7 @@ static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx,
while (data_start < data_end && *data_start == 0)
data_start++;
- data_sz = data_end - data_start;
+ data_sz = (unsigned int)(data_end - data_start);
} while (data_start < data_end);
return res;
}
@@ -452,6 +481,24 @@ static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t *ctx,
return img;
}
+static vpx_codec_err_t vp9_set_fb_fn(
+ vpx_codec_alg_priv_t *ctx,
+ vpx_get_frame_buffer_cb_fn_t cb_get,
+ vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
+ if (cb_get == NULL || cb_release == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ } else if (ctx->pbi == NULL) {
+ // If the decoder has already been initialized, do not accept changes to
+ // the frame buffer functions.
+ ctx->get_ext_fb_cb = cb_get;
+ ctx->release_ext_fb_cb = cb_release;
+ ctx->ext_priv = cb_priv;
+ return VPX_CODEC_OK;
+ }
+
+ return VPX_CODEC_ERROR;
+}
+
static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx,
vpx_codec_mmap_t *mmap,
vpx_codec_iter_t *iter) {
@@ -685,7 +732,8 @@ static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
CODEC_INTERFACE(vpx_codec_vp9_dx) = {
"WebM Project VP9 Decoder" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
- VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC,
+ VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC |
+ VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER,
/* vpx_codec_caps_t caps; */
vp9_init, /* vpx_codec_init_fn_t init; */
vp9_destroy, /* vpx_codec_destroy_fn_t destroy; */
@@ -697,6 +745,7 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
vp9_get_si, /* vpx_codec_get_si_fn_t get_si; */
vp9_decode, /* vpx_codec_decode_fn_t decode; */
vp9_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
+ vp9_set_fb_fn, /* vpx_codec_set_fb_fn_t set_fb_fn; */
},
{ // NOLINT
/* encoder functions */
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 63003b9c2..6679f89be 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -19,7 +19,6 @@ VP9_CX_SRCS-yes += vp9_cx_iface.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c
-VP9_CX_SRCS-yes += encoder/vp9_dct.h
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
VP9_CX_SRCS-yes += encoder/vp9_encodemb.c
@@ -39,7 +38,6 @@ VP9_CX_SRCS-yes += encoder/vp9_lookahead.c
VP9_CX_SRCS-yes += encoder/vp9_lookahead.h
VP9_CX_SRCS-yes += encoder/vp9_mcomp.h
VP9_CX_SRCS-yes += encoder/vp9_onyx_int.h
-VP9_CX_SRCS-yes += encoder/vp9_psnr.h
VP9_CX_SRCS-yes += encoder/vp9_quantize.h
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
@@ -51,12 +49,11 @@ VP9_CX_SRCS-yes += encoder/vp9_mcomp.c
VP9_CX_SRCS-yes += encoder/vp9_onyx_if.c
VP9_CX_SRCS-yes += encoder/vp9_picklpf.c
VP9_CX_SRCS-yes += encoder/vp9_picklpf.h
-VP9_CX_SRCS-yes += encoder/vp9_psnr.c
VP9_CX_SRCS-yes += encoder/vp9_quantize.c
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
-VP9_CX_SRCS-yes += encoder/vp9_sad_c.c
+VP9_CX_SRCS-yes += encoder/vp9_sad.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
VP9_CX_SRCS-yes += encoder/vp9_subexp.c
@@ -66,7 +63,7 @@ VP9_CX_SRCS-yes += encoder/vp9_resize.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
-VP9_CX_SRCS-yes += encoder/vp9_variance_c.c
+VP9_CX_SRCS-yes += encoder/vp9_variance.c
VP9_CX_SRCS-yes += encoder/vp9_vaq.c
VP9_CX_SRCS-yes += encoder/vp9_vaq.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
@@ -87,10 +84,11 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
-ifeq ($(USE_X86INC),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm