summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_loopfilter_neon.asm356
-rw-r--r--vp9/common/vp9_alloccommon.c5
-rw-r--r--vp9/common/vp9_common_data.c43
-rw-r--r--vp9/common/vp9_common_data.h8
-rw-r--r--vp9/common/vp9_enums.h4
-rw-r--r--vp9/common/vp9_loopfilter.c2
-rw-r--r--vp9/common/vp9_loopfilter.h2
-rw-r--r--vp9/common/vp9_onyx.h8
-rw-r--r--vp9/common/vp9_onyxc_int.h3
-rw-r--r--vp9/common/vp9_pred_common.c2
-rw-r--r--vp9/common/vp9_seg_common.h12
-rw-r--r--vp9/decoder/vp9_decodemv.c2
-rw-r--r--vp9/decoder/vp9_decodframe.c4
-rw-r--r--vp9/encoder/vp9_bitstream.c44
-rw-r--r--vp9/encoder/vp9_block.h1
-rw-r--r--vp9/encoder/vp9_encodeframe.c195
-rw-r--r--vp9/encoder/vp9_encodemb.c12
-rw-r--r--vp9/encoder/vp9_encodemv.c18
-rw-r--r--vp9/encoder/vp9_mcomp.c75
-rw-r--r--vp9/encoder/vp9_onyx_if.c72
-rw-r--r--vp9/encoder/vp9_onyx_int.h3
-rw-r--r--vp9/encoder/vp9_rdopt.c125
-rw-r--r--vp9/encoder/vp9_segmentation.c8
23 files changed, 587 insertions, 417 deletions
diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_neon.asm
index 4fe1a6ac6..8b4fe5dcc 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_neon.asm
+++ b/vp9/common/arm/neon/vp9_loopfilter_neon.asm
@@ -37,13 +37,14 @@
|vp9_loop_filter_horizontal_edge_neon| PROC
push {lr}
- ldr r12, [sp,#8] ; load count
+ vld1.8 {d0[]}, [r2] ; duplicate *blimit
+ ldr r12, [sp, #8] ; load count
+ ldr r2, [sp, #4] ; load thresh
add r1, r1, r1 ; double pitch
+
cmp r12, #0
beq end_vp9_lf_h_edge
- vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r2, [sp, #4] ; load thresh
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@@ -99,18 +100,18 @@ end_vp9_lf_h_edge
|vp9_loop_filter_vertical_edge_neon| PROC
push {lr}
- ldr r12, [sp,#8] ; load count
+ vld1.8 {d0[]}, [r2] ; duplicate *blimit
+ ldr r12, [sp, #8] ; load count
+ vld1.8 {d1[]}, [r3] ; duplicate *limit
+
+ ldr r3, [sp, #4] ; load thresh
+ sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_lf_v_edge
- vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r2, [sp, #4] ; load thresh
- vld1.8 {d1[]}, [r3] ; duplicate *limit
- vld1.8 {d2[]}, [r2] ; duplicate *thresh
+ vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_lf_v_loop
- sub r2, r0, #4 ; move s pointer down by 4 columns
-
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
@@ -152,6 +153,7 @@ count_lf_v_loop
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
+ subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
end_vp9_lf_v_edge
@@ -163,6 +165,7 @@ end_vp9_lf_v_edge
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
;
+; Inputs:
; r0-r3, r12 PRESERVE
; d0 blimit
; d1 limit
@@ -175,39 +178,48 @@ end_vp9_lf_v_edge
; d16 q1
; d17 q2
; d18 q3
+;
+; Outputs:
+; d4 op1
+; d5 op0
+; d6 oq0
+; d7 oq1
|vp9_loop_filter_neon| PROC
; filter_mask
- vabd.u8 d19, d3, d4 ; abs(p3 - p2)
- vabd.u8 d20, d4, d5 ; abs(p2 - p1)
- vabd.u8 d21, d5, d6 ; abs(p1 - p0)
- vabd.u8 d22, d16, d7 ; abs(q1 - q0)
- vabd.u8 d3, d17, d16 ; abs(q2 - q1)
- vabd.u8 d4, d18, d17 ; abs(q3 - q2)
+ vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
+ vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
+ vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
+ vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
+ vabd.u8 d3, d17, d16 ; m5 = abs(q2 - q1)
+ vabd.u8 d4, d18, d17 ; m6 = abs(q3 - q2)
; only compare the largest value to limit
- vmax.u8 d19, d19, d20
- vmax.u8 d20, d21, d22
- vmax.u8 d3, d3, d4
- vmax.u8 d23, d19, d20
+ vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
+ vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
vabd.u8 d17, d6, d7 ; abs(p0 - q0)
+ vmax.u8 d3, d3, d4 ; m3 = max(m5, m6)
+
+ vmov.u8 d18, #0x80
+
+ vmax.u8 d23, d19, d20 ; m1 = max(m1, m2)
+
; hevmask
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d2 ; (abs(q1 - q0) > thresh)*-1
- vmax.u8 d23, d23, d3
-
- vmov.u8 d18, #0x80
+ vmax.u8 d23, d23, d3 ; m1 = max(m1, m3)
vabd.u8 d28, d5, d16 ; a = abs(p1 - q1)
vqadd.u8 d17, d17, d17 ; b = abs(p0 - q0) * 2
- ; abs () > limit
- vcge.u8 d23, d1, d23
+ veor d7, d7, d18 ; qs0
+
+ vcge.u8 d23, d1, d23 ; abs(m1) > limit
; filter() function
; convert to signed
- veor d7, d7, d18 ; qs0
+
vshr.u8 d28, d28, #1 ; a = a / 2
veor d6, d6, d18 ; ps0
@@ -244,19 +256,20 @@ end_vp9_lf_v_edge
vshr.s8 d28, d28, #3 ; filter2 >>= 3
vshr.s8 d27, d27, #3 ; filter1 >>= 3
-
vqadd.s8 d19, d6, d28 ; u = clamp(ps0 + filter2)
vqsub.s8 d26, d7, d27 ; u = clamp(qs0 - filter1)
- ; outer tap adjustments: ++filter >> 1
- vrshr.s8 d27, d27, #1
+ ; outer tap adjustments
+ vrshr.s8 d27, d27, #1 ; filter = ++filter1 >> 1
+
+ veor d6, d26, d18 ; *oq0 = u^0x80
+
vbic d27, d27, d22 ; filter &= ~hev
vqadd.s8 d21, d5, d27 ; u = clamp(ps1 + filter)
vqsub.s8 d20, d16, d27 ; u = clamp(qs1 - filter)
veor d5, d19, d18 ; *op0 = u^0x80
- veor d6, d26, d18 ; *oq0 = u^0x80
veor d4, d21, d18 ; *op1 = u^0x80
veor d7, d20, d18 ; *oq1 = u^0x80
@@ -277,13 +290,14 @@ end_vp9_lf_v_edge
|vp9_mbloop_filter_horizontal_edge_neon| PROC
push {r4-r5, lr}
- ldr r12, [sp,#16] ; load count
+ vld1.8 {d0[]}, [r2] ; duplicate *blimit
+ ldr r12, [sp, #16] ; load count
+ ldr r2, [sp, #12] ; load thresh
add r1, r1, r1 ; double pitch
+
cmp r12, #0
beq end_vp9_mblf_h_edge
- vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r2, [sp, #12] ; load thresh
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@@ -305,12 +319,12 @@ count_mblf_h_loop
bl vp9_mbloop_filter_neon
- vst1.u8 {d2}, [r2@64], r1 ; store op2
- vst1.u8 {d3}, [r3@64], r1 ; store op1
- vst1.u8 {d4}, [r2@64], r1 ; store op0
- vst1.u8 {d5}, [r3@64], r1 ; store oq0
- vst1.u8 {d6}, [r2@64], r1 ; store oq1
- vst1.u8 {d7}, [r3@64], r1 ; store oq2
+ vst1.u8 {d0}, [r2@64], r1 ; store op2
+ vst1.u8 {d1}, [r3@64], r1 ; store op1
+ vst1.u8 {d2}, [r2@64], r1 ; store op0
+ vst1.u8 {d3}, [r3@64], r1 ; store oq0
+ vst1.u8 {d4}, [r2@64], r1 ; store oq1
+ vst1.u8 {d5}, [r3@64], r1 ; store oq2
add r0, r0, #8
subs r12, r12, #1
@@ -337,18 +351,18 @@ end_vp9_mblf_h_edge
|vp9_mbloop_filter_vertical_edge_neon| PROC
push {r4-r5, lr}
- ldr r12, [sp,#16] ; load count
+ vld1.8 {d0[]}, [r2] ; duplicate *blimit
+ ldr r12, [sp, #16] ; load count
+ vld1.8 {d1[]}, [r3] ; duplicate *limit
+
+ ldr r3, [sp, #12] ; load thresh
+ sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_mblf_v_edge
- vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r2, [sp, #12] ; load thresh
- vld1.8 {d1[]}, [r3] ; duplicate *limit
- vld1.8 {d2[]}, [r2] ; duplicate *thresh
+ vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_mblf_v_loop
- sub r2, r0, #4 ; move s pointer down by 4 columns
-
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
@@ -380,27 +394,28 @@ count_mblf_v_loop
bl vp9_mbloop_filter_neon
;store op2, op1, op0, oq0
- vst4.8 {d2[0], d3[0], d4[0], d5[0]}, [r2], r1
- vst4.8 {d2[1], d3[1], d4[1], d5[1]}, [r2], r1
- vst4.8 {d2[2], d3[2], d4[2], d5[2]}, [r2], r1
- vst4.8 {d2[3], d3[3], d4[3], d5[3]}, [r2], r1
- vst4.8 {d2[4], d3[4], d4[4], d5[4]}, [r2], r1
- vst4.8 {d2[5], d3[5], d4[5], d5[5]}, [r2], r1
- vst4.8 {d2[6], d3[6], d4[6], d5[6]}, [r2], r1
- vst4.8 {d2[7], d3[7], d4[7], d5[7]}, [r2]
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
+ vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r2], r1
+ vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r2], r1
+ vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r2], r1
+ vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r2], r1
+ vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r2], r1
+ vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r2], r1
+ vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r2]
;store oq1, oq2
- vst2.8 {d6[0], d7[0]}, [r3], r1
- vst2.8 {d6[1], d7[1]}, [r3], r1
- vst2.8 {d6[2], d7[2]}, [r3], r1
- vst2.8 {d6[3], d7[3]}, [r3], r1
- vst2.8 {d6[4], d7[4]}, [r3], r1
- vst2.8 {d6[5], d7[5]}, [r3], r1
- vst2.8 {d6[6], d7[6]}, [r3], r1
- vst2.8 {d6[7], d7[7]}, [r3]
+ vst2.8 {d4[0], d5[0]}, [r3], r1
+ vst2.8 {d4[1], d5[1]}, [r3], r1
+ vst2.8 {d4[2], d5[2]}, [r3], r1
+ vst2.8 {d4[3], d5[3]}, [r3], r1
+ vst2.8 {d4[4], d5[4]}, [r3], r1
+ vst2.8 {d4[5], d5[5]}, [r3], r1
+ vst2.8 {d4[6], d5[6]}, [r3], r1
+ vst2.8 {d4[7], d5[7]}, [r3]
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
+ subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_mblf_v_loop
end_vp9_mblf_v_edge
@@ -412,6 +427,7 @@ end_vp9_mblf_v_edge
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
;
+; Inputs:
; r0-r3, r12 PRESERVE
; d0 blimit
; d1 limit
@@ -424,22 +440,38 @@ end_vp9_mblf_v_edge
; d16 q1
; d17 q2
; d18 q3
+;
+; Outputs:
+; d0 op2
+; d1 op1
+; d2 op0
+; d3 oq0
+; d4 oq1
+; d5 oq2
|vp9_mbloop_filter_neon| PROC
; filter_mask
- vabd.u8 d19, d3, d4 ; abs(p3 - p2)
- vabd.u8 d20, d4, d5 ; abs(p2 - p1)
- vabd.u8 d21, d5, d6 ; abs(p1 - p0)
- vabd.u8 d22, d16, d7 ; abs(q1 - q0)
- vabd.u8 d23, d17, d16 ; abs(q2 - q1)
- vabd.u8 d24, d18, d17 ; abs(q3 - q2)
+ vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
+ vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
+ vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
+ vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
+ vabd.u8 d23, d17, d16 ; m5 = abs(q2 - q1)
+ vabd.u8 d24, d18, d17 ; m6 = abs(q3 - q2)
; only compare the largest value to limit
- vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1))
- vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0))
- vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2))
+ vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
+ vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
+
+ vabd.u8 d25, d6, d4 ; m7 = abs(p0 - p2)
+
+ vmax.u8 d23, d23, d24 ; m3 = max(m5, m6)
+
+ vabd.u8 d26, d7, d17 ; m8 = abs(q0 - q2)
+
vmax.u8 d19, d19, d20
- vabd.u8 d24, d6, d7 ; abs(p0 - q0)
+ vabd.u8 d24, d6, d7 ; m9 = abs(p0 - q0)
+ vabd.u8 d27, d3, d6 ; m10 = abs(p3 - p0)
+ vabd.u8 d28, d18, d7 ; m11 = abs(q3 - q0)
vmax.u8 d19, d19, d23
@@ -449,30 +481,35 @@ end_vp9_mblf_v_edge
; abs () > limit
vcge.u8 d19, d1, d19
- ; flatmask4
- vabd.u8 d25, d6, d4 ; abs(p0 - p2)
- vabd.u8 d26, d7, d17 ; abs(q0 - q2)
- vabd.u8 d27, d3, d6 ; abs(p3 - p0)
- vabd.u8 d28, d18, d7 ; abs(q3 - q0)
-
; only compare the largest value to thresh
- vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2))
- vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0))
- vmax.u8 d25, d25, d26
- vmax.u8 d20, d20, d25
+ vmax.u8 d25, d25, d26 ; m4 = max(m7, m8)
+ vmax.u8 d26, d27, d28 ; m5 = max(m10, m11)
vshr.u8 d23, d23, #1 ; a = a / 2
+
+ vmax.u8 d25, d25, d26 ; m4 = max(m4, m5)
+
vqadd.u8 d24, d24, d23 ; a = b + a
+ vmax.u8 d20, d20, d25 ; m2 = max(m2, m4)
+
vmov.u8 d23, #1
vcge.u8 d24, d0, d24 ; a > blimit
+ vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
+
vcge.u8 d20, d23, d20 ; flat
vand d19, d19, d24 ; mask
+ vcgt.u8 d23, d22, d2 ; (abs(q1 - q0) > thresh)*-1
+
vand d20, d20, d19 ; flat & mask
+ vmov.u8 d22, #0x80
+
+ vorr d23, d21, d23 ; hev
+
; This instruction will truncate the "flat & mask" masks down to 4 bits
; each to fit into one 32 bit arm register. The values are stored in
; q10.64[0].
@@ -480,35 +517,30 @@ end_vp9_mblf_v_edge
vmov.u32 r4, d30[0] ; flat & mask 4bits
adds r5, r4, #1 ; Check for all 1's
+
+ ; If mask and flat are 1's for all vectors, then we only need to execute
+ ; the power branch for all vectors.
beq power_branch_only
cmp r4, #0 ; Check for 0, set flag for later
- ; hevmask
- vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 d22, d22, d2 ; (abs(q1 - q0) > thresh)*-1
- vorr d21, d21, d22 ; hev
-
- vmov.u8 d22, #0x80
-
; mbfilter() function
-
; filter() function
; convert to signed
- veor d23, d7, d22 ; qs0
+ veor d21, d7, d22 ; qs0
veor d24, d6, d22 ; ps0
veor d25, d5, d22 ; ps1
veor d26, d16, d22 ; qs1
vmov.u8 d27, #3
- vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
+ vsub.s8 d28, d21, d24 ; ( qs0 - ps0)
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
- vand d29, d29, d21 ; filter &= hev
+ vand d29, d29, d23 ; filter &= hev
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
@@ -525,80 +557,96 @@ end_vp9_mblf_v_edge
vshr.s8 d29, d29, #3 ; filter1 >>= 3
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
- vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1)
+ vqsub.s8 d21, d21, d29 ; oq0 = clamp(qs0 - filter1)
; outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
- vbic d29, d29, d21 ; filter &= ~hev
+ vbic d29, d29, d23 ; filter &= ~hev
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
+ ; If mask and flat are 0's for all vectors, then we only need to execute
+ ; the filter branch for all vectors.
beq filter_branch_only
+ ; If mask and flat are mixed then we must perform both branches and
+ ; combine the data.
veor d24, d24, d22 ; *f_op0 = u^0x80
- veor d23, d23, d22 ; *f_oq0 = u^0x80
+ veor d21, d21, d22 ; *f_oq0 = u^0x80
veor d25, d25, d22 ; *f_op1 = u^0x80
veor d26, d26, d22 ; *f_oq1 = u^0x80
- ; mbfilter flat && mask branch
- ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
- ; and using vibt on the q's?
- vmov.u8 d21, #2
- vaddl.u8 q14, d6, d7 ; op2 = p0 + q0
- vmlal.u8 q14, d3, d27 ; op2 += p3 * 3
- vmlal.u8 q14, d4, d21 ; op2 += p2 * 2
- vaddw.u8 q14, d5 ; op2 += p1
+ ; At this point we have already executed the filter branch. The filter
+ ; branch does not set op2 or oq2, so use p2 and q2. Execute the power
+ ; branch and combine the data.
+ vmov.u8 d23, #2
+ vaddl.u8 q14, d6, d7 ; r_op2 = p0 + q0
+ vmlal.u8 q14, d3, d27 ; r_op2 += p3 * 3
+ vmlal.u8 q14, d4, d23 ; r_op2 += p2 * 2
+
+ vbif d0, d4, d20 ; op2 |= p2 & ~(flat & mask)
+
+ vaddw.u8 q14, d5 ; r_op2 += p1
+
+ vbif d1, d25, d20 ; op1 |= f_op1 & ~(flat & mask)
+
vqrshrn.u16 d30, q14, #3 ; r_op2
- vsubw.u8 q14, d3 ; op1 = op2 - p3
- vsubw.u8 q14, d4 ; op1 -= p2
- vaddw.u8 q14, d5 ; op1 += p1
- vaddw.u8 q14, d16 ; op1 += q1
+ vsubw.u8 q14, d3 ; r_op1 = r_op2 - p3
+ vsubw.u8 q14, d4 ; r_op1 -= p2
+ vaddw.u8 q14, d5 ; r_op1 += p1
+ vaddw.u8 q14, d16 ; r_op1 += q1
+
+ vbif d2, d24, d20 ; op0 |= f_op0 & ~(flat & mask)
+
vqrshrn.u16 d31, q14, #3 ; r_op1
- vsubw.u8 q14, d3 ; op0 = op1 - p3
- vsubw.u8 q14, d5 ; op0 -= p1
- vaddw.u8 q14, d6 ; op0 += p0
- vaddw.u8 q14, d17 ; op0 += q2
- vqrshrn.u16 d21, q14, #3 ; r_op0
+ vsubw.u8 q14, d3 ; r_op0 = r_op1 - p3
+ vsubw.u8 q14, d5 ; r_op0 -= p1
+ vaddw.u8 q14, d6 ; r_op0 += p0
+ vaddw.u8 q14, d17 ; r_op0 += q2
+
+ vbit d0, d30, d20 ; op2 |= r_op2 & (flat & mask)
+
+ vqrshrn.u16 d23, q14, #3 ; r_op0
+
+ vsubw.u8 q14, d3 ; r_oq0 = r_op0 - p3
+ vsubw.u8 q14, d6 ; r_oq0 -= p0
+ vaddw.u8 q14, d7 ; r_oq0 += q0
+
+ vbit d1, d31, d20 ; op1 |= r_op1 & (flat & mask)
- vsubw.u8 q14, d3 ; oq0 = op0 - p3
- vsubw.u8 q14, d6 ; oq0 -= p0
- vaddw.u8 q14, d7 ; oq0 += q0
vaddw.u8 q14, d18 ; oq0 += q3
+
+ vbit d2, d23, d20 ; op0 |= r_op0 & (flat & mask)
+
vqrshrn.u16 d22, q14, #3 ; r_oq0
- vsubw.u8 q14, d4 ; oq1 = oq0 - p2
- vsubw.u8 q14, d7 ; oq1 -= q0
- vaddw.u8 q14, d16 ; oq1 += q1
- vaddw.u8 q14, d18 ; oq1 += q3
- vqrshrn.u16 d0, q14, #3 ; r_oq1
+ vsubw.u8 q14, d4 ; r_oq1 = r_oq0 - p2
+ vsubw.u8 q14, d7 ; r_oq1 -= q0
+ vaddw.u8 q14, d16 ; r_oq1 += q1
- vsubw.u8 q14, d5 ; oq2 = oq0 - p1
- vsubw.u8 q14, d16 ; oq2 -= q1
- vaddw.u8 q14, d17 ; oq2 += q2
- vaddw.u8 q14, d18 ; oq2 += q3
- vqrshrn.u16 d1, q14, #3 ; r_oq2
+ vbif d3, d21, d20 ; oq0 |= f_oq0 & ~(flat & mask)
+
+ vaddw.u8 q14, d18 ; r_oq1 += q3
- ; Filter does not set op2 or oq2, so use p2 and q2.
- vbit d2, d30, d20 ; op2 |= r_op2 & (flat & mask)
- vbif d2, d4, d20 ; op2 |= op2 & ~(flat & mask)
+ vbif d4, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask)
- vbit d3, d31, d20 ; op1 |= r_op1 & (flat & mask)
- vbif d3, d25, d20 ; op1 |= f_op1 & ~(flat & mask)
+ vqrshrn.u16 d6, q14, #3 ; r_oq1
- vbit d4, d21, d20 ; op0 |= r_op0 & (flat & mask)
- vbif d4, d24, d20 ; op0 |= f_op0 & ~(flat & mask)
+ vsubw.u8 q14, d5 ; r_oq2 = r_oq1 - p1
+ vsubw.u8 q14, d16 ; r_oq2 -= q1
+ vaddw.u8 q14, d17 ; r_oq2 += q2
+ vaddw.u8 q14, d18 ; r_oq2 += q3
- vbit d5, d22, d20 ; oq0 |= r_oq0 & (flat & mask)
- vbif d5, d23, d20 ; oq0 |= f_oq0 & ~(flat & mask)
+ vbif d5, d17, d20 ; oq2 |= q2 & ~(flat & mask)
- vbit d6, d0, d20 ; oq1 |= r_oq1 & (flat & mask)
- vbif d6, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask)
+ vqrshrn.u16 d7, q14, #3 ; r_oq2
- vbit d7, d1, d20 ; oq2 |= r_oq2 & (flat & mask)
- vbif d7, d17, d20 ; oq2 |= oq2 & ~(flat & mask)
+ vbit d3, d22, d20 ; oq0 |= r_oq0 & (flat & mask)
+ vbit d4, d6, d20 ; oq1 |= r_oq1 & (flat & mask)
+ vbit d5, d7, d20 ; oq2 |= r_oq2 & (flat & mask)
bx lr
@@ -609,53 +657,49 @@ power_branch_only
vmlal.u8 q14, d3, d27 ; op2 += p3 * 3
vmlal.u8 q14, d4, d21 ; op2 += p2 * 2
vaddw.u8 q14, d5 ; op2 += p1
- vqrshrn.u16 d2, q14, #3 ; op2
+ vqrshrn.u16 d0, q14, #3 ; op2
vsubw.u8 q14, d3 ; op1 = op2 - p3
vsubw.u8 q14, d4 ; op1 -= p2
vaddw.u8 q14, d5 ; op1 += p1
vaddw.u8 q14, d16 ; op1 += q1
- vqrshrn.u16 d31, q14, #3 ; op1
+ vqrshrn.u16 d1, q14, #3 ; op1
vsubw.u8 q14, d3 ; op0 = op1 - p3
vsubw.u8 q14, d5 ; op0 -= p1
vaddw.u8 q14, d6 ; op0 += p0
vaddw.u8 q14, d17 ; op0 += q2
- vqrshrn.u16 d21, q14, #3 ; op0
+ vqrshrn.u16 d2, q14, #3 ; op0
vsubw.u8 q14, d3 ; oq0 = op0 - p3
vsubw.u8 q14, d6 ; oq0 -= p0
vaddw.u8 q14, d7 ; oq0 += q0
vaddw.u8 q14, d18 ; oq0 += q3
- vqrshrn.u16 d22, q14, #3 ; oq0
+ vqrshrn.u16 d3, q14, #3 ; oq0
vsubw.u8 q14, d4 ; oq1 = oq0 - p2
vsubw.u8 q14, d7 ; oq1 -= q0
vaddw.u8 q14, d16 ; oq1 += q1
vaddw.u8 q14, d18 ; oq1 += q3
- vqrshrn.u16 d6, q14, #3 ; oq1
+ vqrshrn.u16 d4, q14, #3 ; oq1
- vsubw.u8 q14, d5 ; oq2 = oq0 - p1
+ vsubw.u8 q14, d5 ; oq2 = oq1 - p1
vsubw.u8 q14, d16 ; oq2 -= q1
vaddw.u8 q14, d17 ; oq2 += q2
vaddw.u8 q14, d18 ; oq2 += q3
- vqrshrn.u16 d7, q14, #3 ; oq2
-
- vswp d3, d31
- vswp d4, d21
- vswp d5, d22
+ vqrshrn.u16 d5, q14, #3 ; oq2
bx lr
filter_branch_only
; TODO(fgalligan): See if we can rearange registers so we do not need to
; do the 2 vswp.
- vswp d2, d4 ; op2
- vswp d7, d17 ; oq2
- veor d4, d24, d22 ; *op0 = u^0x80
- veor d5, d23, d22 ; *oq0 = u^0x80
- veor d3, d25, d22 ; *op1 = u^0x80
- veor d6, d26, d22 ; *oq1 = u^0x80
+ vswp d0, d4 ; op2
+ vswp d5, d17 ; oq2
+ veor d2, d24, d22 ; *op0 = u^0x80
+ veor d3, d21, d22 ; *oq0 = u^0x80
+ veor d1, d25, d22 ; *op1 = u^0x80
+ veor d4, d26, d22 ; *oq1 = u^0x80
bx lr
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 0a4f921c2..554a31730 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -53,7 +53,6 @@ void vp9_free_frame_buffers(VP9_COMMON *oci) {
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp9_free_frame_buffer(&oci->yv12_fb[i]);
- vp9_free_frame_buffer(&oci->temp_scale_frame);
vp9_free_frame_buffer(&oci->post_proc_buffer);
vpx_free(oci->mip);
@@ -121,10 +120,6 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
oci->fb_idx_ref_cnt[i] = 1;
}
- if (vp9_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, ss_x, ss_y,
- VP9BORDERINPIXELS) < 0)
- goto fail;
-
if (vp9_alloc_frame_buffer(&oci->post_proc_buffer, width, height, ss_x, ss_y,
VP9BORDERINPIXELS) < 0)
goto fail;
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c
index d5b51e89d..dee44ec63 100644
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -17,11 +17,54 @@ const int b_width_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4};
const int b_height_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4};
+const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
+ {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16};
+const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES] =
+ {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16};
// Log 2 conversion lookup tables for modeinfo width and height
const int mi_width_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
+const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
+ {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
+const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES] =
+ {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
+
+const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
+ { // 4X4
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID
+ }, { // 8X8
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID
+ }, { // 16X16
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID
+ }, { // 32X32
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT,
+ PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
+ PARTITION_INVALID, PARTITION_INVALID
+ }, { // 64X64
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ,
+ PARTITION_NONE
+ }
+};
const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
{ // PARTITION_NONE
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index 52c314897..8b0f8a500 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -17,6 +17,14 @@ extern const int b_width_log2_lookup[BLOCK_SIZE_TYPES];
extern const int b_height_log2_lookup[BLOCK_SIZE_TYPES];
extern const int mi_width_log2_lookup[BLOCK_SIZE_TYPES];
extern const int mi_height_log2_lookup[BLOCK_SIZE_TYPES];
+extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES];
+extern const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES];
+extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES];
+extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES];
+extern const PARTITION_TYPE
+ partition_lookup[][BLOCK_SIZE_TYPES];
+
+
extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 855c5e3de..86f0d0bfd 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -35,7 +35,7 @@ typedef enum BLOCK_SIZE_TYPE {
BLOCK_SIZE_SB32X64, BLOCK_32X64 = BLOCK_SIZE_SB32X64,
BLOCK_SIZE_SB64X32, BLOCK_64X32 = BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X64, BLOCK_64X64 = BLOCK_SIZE_SB64X64,
- BLOCK_SIZE_TYPES, BLOCK_MAX_SB_SEGMENTS = BLOCK_SIZE_TYPES,
+ BLOCK_SIZE_TYPES, BLOCK_MAX_SB_SEGMENTS = BLOCK_SIZE_TYPES
} BLOCK_SIZE_TYPE;
typedef enum PARTITION_TYPE {
@@ -43,7 +43,7 @@ typedef enum PARTITION_TYPE {
PARTITION_HORZ,
PARTITION_VERT,
PARTITION_SPLIT,
- PARTITION_TYPES
+ PARTITION_TYPES, PARTITION_INVALID = PARTITION_TYPES
} PARTITION_TYPE;
#define PARTITION_PLOFFSET 4 // number of probability models per block size
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index d8be8765a..5498b1717 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -87,7 +87,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
lf->last_sharpness_level = lf->sharpness_level;
}
- for (seg = 0; seg < MAX_MB_SEGMENTS; seg++) {
+ for (seg = 0; seg < MAX_SEGMENTS; seg++) {
int lvl_seg = default_filt_lvl, ref, mode, intra_lvl;
// Set the baseline filter values for each segment
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index fddf2ce82..e59cc6485 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -31,7 +31,7 @@ typedef struct {
lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
hev_thr[4][SIMD_WIDTH]);
- uint8_t lvl[MAX_MB_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
+ uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
uint8_t mode_lf_lut[MB_MODE_COUNT];
} loop_filter_info_n;
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index fe8122b46..152046f6f 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -22,7 +22,7 @@ extern "C"
#include "vpx_scale/yv12config.h"
#include "vp9/common/vp9_ppflags.h"
-#define MAX_MB_SEGMENTS 8
+#define MAX_SEGMENTS 8
typedef int *VP9_PTR;
@@ -200,9 +200,9 @@ extern "C"
int vp9_set_roimap(VP9_PTR comp, unsigned char *map,
unsigned int rows, unsigned int cols,
- int delta_q[MAX_MB_SEGMENTS],
- int delta_lf[MAX_MB_SEGMENTS],
- unsigned int threshold[MAX_MB_SEGMENTS]);
+ int delta_q[MAX_SEGMENTS],
+ int delta_lf[MAX_SEGMENTS],
+ unsigned int threshold[MAX_SEGMENTS]);
int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
unsigned int rows, unsigned int cols);
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 2efdf8fa3..8b76ac711 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -130,10 +130,7 @@ typedef struct VP9Common {
struct scale_factors active_ref_scale[ALLOWED_REFS_PER_FRAME];
int new_fb_idx;
-
YV12_BUFFER_CONFIG post_proc_buffer;
- YV12_BUFFER_CONFIG temp_scale_frame;
-
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index ea2b0f418..71fca4cb9 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -445,6 +445,6 @@ int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
segment_id = MIN(segment_id,
segment_ids[mi_offset + y * cm->mi_cols + x]);
- assert(segment_id >= 0 && segment_id < MAX_MB_SEGMENTS);
+ assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
return segment_id;
}
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index f072a518d..f22239b92 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -16,8 +16,8 @@
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
-#define MAX_MB_SEGMENTS 8
-#define MB_SEG_TREE_PROBS (MAX_MB_SEGMENTS-1)
+#define MAX_SEGMENTS 8
+#define SEG_TREE_PROBS (MAX_SEGMENTS-1)
#define PREDICTION_PROBS 3
@@ -27,7 +27,7 @@ typedef enum {
SEG_LVL_ALT_LF = 1, // Use alternate loop filter value...
SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame
SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode
- SEG_LVL_MAX = 4 // Number of MB level features supported
+ SEG_LVL_MAX = 4 // Number of features supported
} SEG_LVL_FEATURES;
@@ -38,11 +38,11 @@ struct segmentation {
uint8_t abs_delta;
uint8_t temporal_update;
- vp9_prob tree_probs[MB_SEG_TREE_PROBS];
+ vp9_prob tree_probs[SEG_TREE_PROBS];
vp9_prob pred_probs[PREDICTION_PROBS];
- int16_t feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX];
- unsigned int feature_mask[MAX_MB_SEGMENTS];
+ int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
+ unsigned int feature_mask[MAX_SEGMENTS];
};
int vp9_segfeature_active(const struct segmentation *seg,
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 6660f5b8e..0fdba805d 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -86,7 +86,7 @@ static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE_TYPE bsize,
const int ymis = MIN(cm->mi_rows - mi_row, bh);
int x, y;
- assert(segment_id >= 0 && segment_id < MAX_MB_SEGMENTS);
+ assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
for (y = 0; y < ymis; y++)
for (x = 0; x < xmis; x++)
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 61c14b8dc..6f7908ffc 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -400,7 +400,7 @@ static void setup_segmentation(struct segmentation *seg,
// Segmentation map update
seg->update_map = vp9_rb_read_bit(rb);
if (seg->update_map) {
- for (i = 0; i < MB_SEG_TREE_PROBS; i++)
+ for (i = 0; i < SEG_TREE_PROBS; i++)
seg->tree_probs[i] = vp9_rb_read_bit(rb) ? vp9_rb_read_literal(rb, 8)
: MAX_PROB;
@@ -422,7 +422,7 @@ static void setup_segmentation(struct segmentation *seg,
vp9_clearall_segfeatures(seg);
- for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
int data = 0;
const int feature_enabled = vp9_rb_read_bit(rb);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 07cb2b83e..2fede1580 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -459,10 +459,10 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
write_intra_mode(bc, mode, pc->fc.y_mode_prob[MIN(3, bsl)]);
} else {
int idx, idy;
- int bw = 1 << b_width_log2(mi->sb_type);
- int bh = 1 << b_height_log2(mi->sb_type);
- for (idy = 0; idy < 2; idy += bh)
- for (idx = 0; idx < 2; idx += bw) {
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high)
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode;
write_intra_mode(bc, bm, pc->fc.y_mode_prob[0]);
}
@@ -498,11 +498,11 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
int j;
MB_PREDICTION_MODE blockmode;
int_mv blockmv;
- int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl;
- int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl;
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
int idx, idy;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
j = idy * 2 + idx;
blockmode = cpi->mb.partition_info->bmi[j].mode;
blockmv = m->bmi[j].as_mv[0];
@@ -563,10 +563,10 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]);
} else {
int idx, idy;
- int bw = 1 << b_width_log2(m->mbmi.sb_type);
- int bh = 1 << b_height_log2(m->mbmi.sb_type);
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type];
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
int i = idy * 2 + idx;
const MB_PREDICTION_MODE A = above_block_mode(m, i, mis);
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
@@ -619,7 +619,6 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
- int bwl, bhl;
int bsl = b_width_log2(bsize);
int bs = (1 << bsl) / 4; // mode_info step for subsize
int n;
@@ -629,20 +628,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- bwl = b_width_log2(m->mbmi.sb_type);
- bhl = b_height_log2(m->mbmi.sb_type);
-
- // parse the partition type
- if ((bwl == bsl) && (bhl == bsl))
- partition = PARTITION_NONE;
- else if ((bwl == bsl) && (bhl < bsl))
- partition = PARTITION_HORZ;
- else if ((bwl < bsl) && (bhl == bsl))
- partition = PARTITION_VERT;
- else if ((bwl < bsl) && (bhl < bsl))
- partition = PARTITION_SPLIT;
- else
- assert(0);
+ partition = partition_lookup[bsl][m->mbmi.sb_type];
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
@@ -1011,7 +997,7 @@ static void encode_segmentation(VP9_COMP *cpi,
// Select the coding strategy (temporal or spatial)
vp9_choose_segmap_coding_method(cpi);
// Write out probabilities used to decode unpredicted macro-block segments
- for (i = 0; i < MB_SEG_TREE_PROBS; i++) {
+ for (i = 0; i < SEG_TREE_PROBS; i++) {
const int prob = seg->tree_probs[i];
const int update = prob != MAX_PROB;
vp9_wb_write_bit(wb, update);
@@ -1037,7 +1023,7 @@ static void encode_segmentation(VP9_COMP *cpi,
if (seg->update_data) {
vp9_wb_write_bit(wb, seg->abs_delta);
- for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
const int active = vp9_segfeature_active(seg, i, j);
vp9_wb_write_bit(wb, active);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index d6882d585..4b49b17a2 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -96,6 +96,7 @@ struct macroblock {
signed int act_zbin_adj;
int mv_best_ref_index[MAX_REF_FRAMES];
+ unsigned int max_mv_context[MAX_REF_FRAMES];
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 2e7cb291d..502308766 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -323,7 +323,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mb_mode_index = ctx->best_mode_index;
const int mis = cpi->common.mode_info_stride;
- const int bh = 1 << mi_height_log2(bsize), bw = 1 << mi_width_log2(bsize);
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
assert(mi->mbmi.mode < MB_MODE_COUNT);
assert(mb_mode_index < MAX_MODES);
@@ -333,10 +334,10 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
- for (y = 0; y < bh; y++) {
- for (x_idx = 0; x_idx < bw; x_idx++) {
- if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > x_idx
- && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > y) {
+ for (y = 0; y < mi_height; y++) {
+ for (x_idx = 0; x_idx < mi_width; x_idx++) {
+ if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + mi_width > x_idx
+ && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + mi_height > y) {
MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis;
*mi_addr = *mi;
}
@@ -412,10 +413,10 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) {
int i, j;
- for (j = 0; j < bh; ++j)
- for (i = 0; i < bw; ++i)
- if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > i
- && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > j)
+ for (j = 0; j < mi_height; ++j)
+ for (i = 0; i < mi_width; ++i)
+ if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + mi_width > i
+ && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + mi_height > j)
xd->mode_info_context[mis * j + i].mbmi = *mbmi;
}
@@ -459,7 +460,8 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
MB_MODE_INFO *mbmi;
const int dst_fb_idx = cm->new_fb_idx;
const int idx_str = xd->mode_info_stride * mi_row + mi_col;
- const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int mb_row = mi_row >> 1;
const int mb_col = mi_col >> 1;
const int idx_map = mb_row * cm->mb_cols + mb_col;
@@ -496,13 +498,13 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
x->mv_row_min = -((mi_row * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
x->mv_col_min = -((mi_col * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
x->mv_row_max = ((cm->mi_rows - mi_row) * MI_SIZE
- + (VP9BORDERINPIXELS - MI_SIZE * bh - VP9_INTERP_EXTEND));
+ + (VP9BORDERINPIXELS - MI_SIZE * mi_height - VP9_INTERP_EXTEND));
x->mv_col_max = ((cm->mi_cols - mi_col) * MI_SIZE
- + (VP9BORDERINPIXELS - MI_SIZE * bw - VP9_INTERP_EXTEND));
+ + (VP9BORDERINPIXELS - MI_SIZE * mi_width - VP9_INTERP_EXTEND));
// Set up distance of MB to edge of frame in 1/8th pel units
- assert(!(mi_col & (bw - 1)) && !(mi_row & (bh - 1)));
- set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
+ assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
+ set_mi_row_col(cm, xd, mi_row, mi_height, mi_col, mi_width);
/* set up source buffers */
vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
@@ -676,23 +678,27 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD * const xd = &x->e_mbd;
int p;
- int bwl = b_width_log2(bsize), bw = 1 << bwl;
- int bhl = b_height_log2(bsize), bh = 1 << bhl;
- int mwl = mi_width_log2(bsize), mw = 1 << mwl;
- int mhl = mi_height_log2(bsize), mh = 1 << mhl;
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int mi_height = num_8x8_blocks_high_lookup[bsize];
for (p = 0; p < MAX_MB_PLANE; p++) {
vpx_memcpy(
cm->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
- a + bw * p, sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
+ a + num_4x4_blocks_wide * p,
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
+ xd->plane[p].subsampling_x);
vpx_memcpy(
cm->left_context[p]
- + ((mi_row & MI_MASK)* 2 >> xd->plane[p].subsampling_y),l + bh * p,
- sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
- }
+ + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
+ l + num_4x4_blocks_high * p,
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
+ xd->plane[p].subsampling_y);
+ }
vpx_memcpy(cm->above_seg_context + mi_col, sa,
- sizeof(PARTITION_CONTEXT) * mw);
+ sizeof(PARTITION_CONTEXT) * mi_width);
vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
- sizeof(PARTITION_CONTEXT) * mh);
+ sizeof(PARTITION_CONTEXT) * mi_height);
}
static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
@@ -703,27 +709,30 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD * const xd = &x->e_mbd;
int p;
- int bwl = b_width_log2(bsize), bw = 1 << bwl;
- int bhl = b_height_log2(bsize), bh = 1 << bhl;
- int mwl = mi_width_log2(bsize), mw = 1 << mwl;
- int mhl = mi_height_log2(bsize), mh = 1 << mhl;
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int mi_height = num_8x8_blocks_high_lookup[bsize];
// buffer the above/left context information of the block in search.
for (p = 0; p < MAX_MB_PLANE; ++p) {
vpx_memcpy(
- a + bw * p,
+ a + num_4x4_blocks_wide * p,
cm->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
- sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
+ xd->plane[p].subsampling_x);
vpx_memcpy(
- l + bh * p,
+ l + num_4x4_blocks_high * p,
cm->left_context[p]
- + ((mi_row & MI_MASK)* 2 >> xd->plane[p].subsampling_y),sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
- }
+ + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
+ xd->plane[p].subsampling_y);
+ }
vpx_memcpy(sa, cm->above_seg_context + mi_col,
- sizeof(PARTITION_CONTEXT) * mw);
+ sizeof(PARTITION_CONTEXT) * mi_width);
vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
- sizeof(PARTITION_CONTEXT) * mh)
- ;}
+ sizeof(PARTITION_CONTEXT) * mi_height);
+}
static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
int output_enabled, BLOCK_SIZE_TYPE bsize, int sub_index) {
@@ -759,8 +768,10 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
MACROBLOCKD * const xd = &x->e_mbd;
BLOCK_SIZE_TYPE c1 = BLOCK_SIZE_SB8X8;
const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4;
- int bwl, bhl;
int UNINITIALIZED_IS_SAFE(pl);
+ PARTITION_TYPE partition;
+ BLOCK_SIZE_TYPE subsize;
+ int i;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -771,44 +782,46 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
pl = partition_plane_context(xd, bsize);
c1 = *(get_sb_partitioning(x, bsize));
}
+ partition = partition_lookup[bsl][c1];
- bwl = b_width_log2(c1), bhl = b_height_log2(c1);
-
- if (bsl == bwl && bsl == bhl) {
- if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
- cpi->partition_count[pl][PARTITION_NONE]++;
- encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
- } else if (bsl == bhl && bsl > bwl) {
- if (output_enabled)
- cpi->partition_count[pl][PARTITION_VERT]++;
- encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
- encode_b(cpi, tp, mi_row, mi_col + bs, output_enabled, c1, 1);
- } else if (bsl == bwl && bsl > bhl) {
- if (output_enabled)
- cpi->partition_count[pl][PARTITION_HORZ]++;
- encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
- encode_b(cpi, tp, mi_row + bs, mi_col, output_enabled, c1, 1);
- } else {
- BLOCK_SIZE_TYPE subsize;
- int i;
-
- assert(bwl < bsl && bhl < bsl);
- subsize = get_subsize(bsize, PARTITION_SPLIT);
+ switch (partition) {
+ case PARTITION_NONE:
+ if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
+ cpi->partition_count[pl][PARTITION_NONE]++;
+ encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
+ break;
+ case PARTITION_VERT:
+ if (output_enabled)
+ cpi->partition_count[pl][PARTITION_VERT]++;
+ encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
+ encode_b(cpi, tp, mi_row, mi_col + bs, output_enabled, c1, 1);
+ break;
+ case PARTITION_HORZ:
+ if (output_enabled)
+ cpi->partition_count[pl][PARTITION_HORZ]++;
+ encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
+ encode_b(cpi, tp, mi_row + bs, mi_col, output_enabled, c1, 1);
+ break;
+ case PARTITION_SPLIT:
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
- if (output_enabled)
- cpi->partition_count[pl][PARTITION_SPLIT]++;
+ if (output_enabled)
+ cpi->partition_count[pl][PARTITION_SPLIT]++;
- for (i = 0; i < 4; i++) {
- const int x_idx = i & 1, y_idx = i >> 1;
+ for (i = 0; i < 4; i++) {
+ const int x_idx = i & 1, y_idx = i >> 1;
- *(get_sb_index(xd, subsize)) = i;
- encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
- output_enabled, subsize);
- }
+ *(get_sb_index(xd, subsize)) = i;
+ encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
+ output_enabled, subsize);
+ }
+ break;
+ default:
+ assert(0);
+ break;
}
- if (bsize >= BLOCK_SIZE_SB8X8
- && (bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) {
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_SIZE_SB8X8) {
set_partition_seg_context(cm, xd, mi_row, mi_col);
update_partition_context(xd, c1, bsize);
}
@@ -1159,13 +1172,11 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
- int bwl = b_width_log2(m->mbmi.sb_type);
- int bhl = b_height_log2(m->mbmi.sb_type);
int bsl = b_width_log2(bsize);
- int bs = (1 << bsl);
- int bh = (1 << bhl);
- int ms = bs / 2;
- int mh = bh / 2;
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int ms = num_4x4_blocks_wide / 2;
+ int mh = num_4x4_blocks_high / 2;
int bss = (1 << bsl) / 4;
int i, pl;
PARTITION_TYPE partition = PARTITION_NONE;
@@ -1187,17 +1198,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- // parse the partition type
- if ((bwl == bsl) && (bhl == bsl))
- partition = PARTITION_NONE;
- else if ((bwl == bsl) && (bhl < bsl))
- partition = PARTITION_HORZ;
- else if ((bwl < bsl) && (bhl == bsl))
- partition = PARTITION_VERT;
- else if ((bwl < bsl) && (bhl < bsl))
- partition = PARTITION_SPLIT;
- else
- assert(0);
+ partition = partition_lookup[bsl][bs_type];
subsize = get_subsize(bsize, partition);
@@ -1340,8 +1341,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
// Split partition.
for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) * (bs >> 2);
- int y_idx = (i >> 1) * (bs >> 2);
+ int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2);
+ int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2);
int rt = 0;
int64_t dt = 0;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
@@ -2468,10 +2469,12 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
++cpi->y_mode_count[MIN(bsl, 3)][m];
} else {
int idx, idy;
- int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type);
- int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type);
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[
+ xd->mode_info_context->mbmi.sb_type];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[
+ xd->mode_info_context->mbmi.sb_type];
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode;
++cpi->y_mode_count[0][m];
}
@@ -2509,8 +2512,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
MB_MODE_INFO *mbmi = &mi->mbmi;
unsigned int segment_id = mbmi->segment_id;
const int mis = cm->mode_info_stride;
- const int bwl = mi_width_log2(bsize);
- const int bw = 1 << bwl, bh = 1 << mi_height_log2(bsize);
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->rd_search = 0;
x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
xd->q_index < QIDX_SKIP_THRESH);
@@ -2635,8 +2638,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
sz = TX_4X4;
}
- for (y = 0; y < bh; y++) {
- for (x = 0; x < bw; x++) {
+ for (y = 0; y < mi_height; y++) {
+ for (x = 0; x < mi_width; x++) {
if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) {
mi[mis * y + x].mbmi.txfm_size = sz;
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 6a918926d..710417948 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -441,7 +441,7 @@ void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
TX_TYPE tx_type;
const int16_t *scan, *iscan;
uint16_t *eob = &pd->eobs[block];
- const int bwl = b_width_log2(bsize) - pd->subsampling_x, bw = 1 << bwl;
+ const int bwl = plane_block_width_log2by4(bsize, pd), bw = 1 << bwl;
const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
int xoff, yoff;
int16_t *src_diff;
@@ -533,6 +533,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
if (x->skip_encode)
return;
+ if (pd->eobs[block] == 0)
+ return;
switch (ss_txfrm_size / 2) {
case TX_32X32:
@@ -657,7 +659,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
- if (!x->skip_encode)
+ if (!x->skip_encode && *eob)
vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
break;
case TX_16X16:
@@ -682,7 +684,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
- if (!x->skip_encode) {
+ if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
else
@@ -711,7 +713,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
- if (!x->skip_encode) {
+ if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride);
else
@@ -743,7 +745,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
- if (!x->skip_encode) {
+ if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 500f57442..f0c34b373 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -486,11 +486,11 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w,
if (mv_joint_horizontal(j))
encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
- // If auto_mv_step_size is enabled and it is an arf/non shown frame
- // then keep track of the largest motion vector component used.
- if (cpi->sf.auto_mv_step_size && !cpi->common.show_frame) {
- cpi->max_mv_magnitude = MAX((MAX(abs(mv->row), abs(mv->col)) >> 3),
- cpi->max_mv_magnitude);
+ // If auto_mv_step_size is enabled then keep track of the largest
+ // motion vector component used.
+ if (!cpi->dummy_packing && cpi->sf.auto_mv_step_size) {
+ unsigned int maxv = MAX(abs(mv->row), abs(mv->col)) >> 3;
+ cpi->max_mv_magnitude = MAX(maxv, cpi->max_mv_magnitude);
}
}
@@ -513,14 +513,14 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
MODE_INFO *mi = x->e_mbd.mode_info_context;
MB_MODE_INFO *const mbmi = &mi->mbmi;
MV diff;
- const int bw = 1 << b_width_log2(mbmi->sb_type);
- const int bh = 1 << b_height_log2(mbmi->sb_type);
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int idx, idy;
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
PARTITION_INFO *pi = x->partition_info;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int i = idy * 2 + idx;
if (pi->bmi[i].mode == NEWMV) {
diff.row = mi->bmi[i].as_mv[0].as_mv.row - best_ref_mv->as_mv.row;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 5b7bed463..0be98913e 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -19,11 +19,13 @@
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_common.h"
+// #define NEW_DIAMOND_SEARCH
+
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
- ((ref_mv->as_mv.col & 7) ? 1 : 0);
+ ((ref_mv->as_mv.col & 7) ? 1 : 0);
int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
- ((ref_mv->as_mv.row & 7) ? 1 : 0);
+ ((ref_mv->as_mv.row & 7) ? 1 : 0);
int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
@@ -1511,12 +1513,13 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
- if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
- (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
-
- {
+ if ((this_col_offset > x->mv_col_min) &&
+ (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) &&
+ (this_row_offset < x->mv_row_max)) {
check_here = ss[i].offset + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+ bestsad);
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
@@ -1539,6 +1542,34 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
best_mv->as_mv.col += ss[best_site].mv.col;
best_address += ss[best_site].offset;
last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+ while (1) {
+ this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
+ this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
+ if ((this_col_offset > x->mv_col_min) &&
+ (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) &&
+ (this_row_offset < x->mv_row_max)) {
+ check_here = ss[best_site].offset + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+ bestsad);
+ if (thissad < bestsad) {
+ this_mv.as_mv.row = this_row_offset;
+ this_mv.as_mv.col = this_col_offset;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row += ss[best_site].mv.row;
+ best_mv->as_mv.col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ continue;
+ }
+ }
+ }
+ break;
+ };
+#endif
} else if (best_address == in_what)
(*num00)++;
}
@@ -1680,12 +1711,39 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
i++;
}
}
-
if (best_site != last_site) {
best_mv->as_mv.row += ss[best_site].mv.row;
best_mv->as_mv.col += ss[best_site].mv.col;
best_address += ss[best_site].offset;
last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+ while (1) {
+ this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
+ this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
+ if ((this_col_offset > x->mv_col_min) &&
+ (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) &&
+ (this_row_offset < x->mv_row_max)) {
+ check_here = ss[best_site].offset + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+ bestsad);
+ if (thissad < bestsad) {
+ this_mv.as_mv.row = this_row_offset;
+ this_mv.as_mv.col = this_col_offset;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row += ss[best_site].mv.row;
+ best_mv->as_mv.col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ continue;
+ }
+ }
+ }
+ break;
+ };
+#endif
} else if (best_address == in_what)
(*num00)++;
}
@@ -1706,6 +1764,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
+
int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *mvp_full, int step_param,
int sadpb, int further_steps,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index fe276fa6b..7b50e076e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -706,12 +706,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->mode_chosen_counts[i] = 0;
}
- // Initialize cpi->max_mv_magnitude if appropriate.
- if ((cpi->common.frame_type == KEY_FRAME) || cpi->common.intra_only ||
- (cpi->common.show_frame == 0)) {
- cpi->max_mv_magnitude = 0;
- }
-
// best quality defaults
sf->RD = 1;
sf->search_method = NSTEP;
@@ -773,7 +767,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
#else
sf->static_segmentation = 0;
#endif
- sf->auto_mv_step_size = 1;
sf->use_avoid_tested_higherror = 1;
sf->adaptive_rd_thresh = 1;
sf->last_chroma_intra_mode = TM_PRED;
@@ -798,6 +791,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->last_chroma_intra_mode = H_PRED;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
+ sf->auto_mv_step_size = 1;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@@ -824,6 +818,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->using_small_partition_info = 1;
sf->disable_splitmv =
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
+ sf->auto_mv_step_size = 1;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -840,6 +835,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->disable_splitmv = 1;
+ sf->auto_mv_step_size = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -856,6 +852,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_COMP_REFMISMATCH;
sf->use_rd_breakout = 1;
sf->optimize_coefficients = 0;
+ sf->auto_mv_step_size = 1;
// sf->reduce_first_step_size = 1;
// sf->reference_masking = 1;
@@ -1222,7 +1219,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
{
int i;
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ for (i = 0; i < MAX_SEGMENTS; i++)
cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
}
@@ -2515,6 +2512,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
int undershoot_seen = 0;
SPEED_FEATURES *sf = &cpi->sf;
+ unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height);
#if RESET_FOREACH_FILTER
int q_low0;
int q_high0;
@@ -2587,6 +2585,24 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Set default state for segment based loop filter update flags
xd->lf.mode_ref_delta_update = 0;
+ // Initialize cpi->mv_step_param to default based on max resolution
+ cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def);
+ // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
+ if (sf->auto_mv_step_size) {
+ if ((cpi->common.frame_type == KEY_FRAME) || cpi->common.intra_only) {
+ // initialize max_mv_magnitude for use in the first INTER frame
+ // after a key/intra-only frame
+ cpi->max_mv_magnitude = max_mv_def;
+ } else {
+ if (cm->show_frame)
+ // allow mv_steps to correspond to twice the max mv magnitude found
+ // in the previous frame, capped by the default max_mv_magnitude based
+ // on resolution
+ cpi->mv_step_param = vp9_init_search_range(
+ cpi, MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ cpi->max_mv_magnitude = 0;
+ }
+ }
// Set various flags etc to special state if it is a key frame
if (cm->frame_type == KEY_FRAME) {
@@ -3444,15 +3460,24 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cm->last_width = cm->width;
cm->last_height = cm->height;
- // Don't increment frame counters if this was an altref buffer
- // update not a real frame
+ // reset to normal state now that we are done.
cm->last_show_frame = cm->show_frame;
if (cm->show_frame) {
+ // current mip will be the prev_mip for the next frame
+ MODE_INFO *temp = cm->prev_mip;
+ cm->prev_mip = cm->mip;
+ cm->mip = temp;
+
+ // update the upper left visible macroblock ptrs
+ cm->mi = cm->mip + cm->mode_info_stride + 1;
+
+ // Don't increment frame counters if this was an altref buffer
+ // update not a real frame
++cm->current_video_frame;
++cpi->frames_since_key;
}
-
- // reset to normal state now that we are done.
+ // restore prev_mi
+ cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
#if 0
{
@@ -3470,17 +3495,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_write_yuv_rec_frame(cm);
#endif
- if (cm->show_frame) {
- vpx_memcpy(cm->prev_mip, cm->mip,
- cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE) *
- sizeof(MODE_INFO));
- } else {
- vpx_memset(cm->prev_mip, 0,
- cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE) *
- sizeof(MODE_INFO));
- }
- // restore prev_mi
- cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
}
static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
@@ -3973,11 +3987,11 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
}
int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
- unsigned int cols, int delta_q[MAX_MB_SEGMENTS],
- int delta_lf[MAX_MB_SEGMENTS],
- unsigned int threshold[MAX_MB_SEGMENTS]) {
+ unsigned int cols, int delta_q[MAX_SEGMENTS],
+ int delta_lf[MAX_SEGMENTS],
+ unsigned int threshold[MAX_SEGMENTS]) {
VP9_COMP *cpi = (VP9_COMP *) comp;
- signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS];
+ signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS];
MACROBLOCKD *xd = &cpi->mb.e_mbd;
int i;
@@ -3996,14 +4010,14 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
vp9_enable_segmentation((VP9_PTR)cpi);
// Set up the quan, LF and breakout threshold segment data
- for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ for (i = 0; i < MAX_SEGMENTS; i++) {
feature_data[SEG_LVL_ALT_Q][i] = delta_q[i];
feature_data[SEG_LVL_ALT_LF][i] = delta_lf[i];
cpi->segment_encode_breakout[i] = threshold[i];
}
// Enable the loop and quant changes in the feature mask
- for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ for (i = 0; i < MAX_SEGMENTS; i++) {
if (delta_q[i])
vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q);
else
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 2c65fecd1..0798927bd 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -504,6 +504,7 @@ typedef struct VP9_COMP {
int error_bins[1024];
unsigned int max_mv_magnitude;
+ int mv_step_param;
// Data used for real time conferencing mode to help determine if it would be good to update the gf
int inter_zz_count;
@@ -513,7 +514,7 @@ typedef struct VP9_COMP {
unsigned char *segmentation_map;
// segment threashold for encode breakout
- int segment_encode_breakout[MAX_MB_SEGMENTS];
+ int segment_encode_breakout[MAX_SEGMENTS];
unsigned char *active_map;
unsigned int active_map_enabled;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d52091c70..9c6f9f8db 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -463,10 +463,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
- const int bwl = plane_block_width_log2by4(bsize, pd);
- const int bhl = plane_block_height_log2by4(bsize, pd);
- const int bw = 4 << bwl;
- const int bh = 4 << bhl;
+ const int width = plane_block_width(bsize, pd);
+ const int height = plane_block_height(bsize, pd);
int rate_sum = 0;
int64_t dist_sum = 0;
@@ -485,10 +483,9 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
} else {
assert(0);
}
- assert(bs <= get_block_size(bwl, bhl));
*out_skip = 1;
- for (j = 0; j < bh; j+=t) {
- for (k = 0; k < bw; k+=t) {
+ for (j = 0; j < height; j += t) {
+ for (k = 0; k < width; k += t) {
int rate;
int64_t dist;
unsigned int sse;
@@ -711,8 +708,8 @@ static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD * const xd = &x->e_mbd;
- const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
- const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
+ const int bwl = plane_block_width_log2by4(bsize, &xd->plane[plane]);
+ const int bhl = plane_block_height_log2by4(bsize, &xd->plane[plane]);
const int bw = 1 << bwl, bh = 1 << bhl;
struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
0, 0, 0, INT64_MAX, 0 };
@@ -802,8 +799,8 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
- const int bwl = b_width_log2(bsize) - xd->plane[0].subsampling_x;
- const int bhl = b_height_log2(bsize) - xd->plane[0].subsampling_y;
+ const int bwl = plane_block_width_log2by4(bsize, pd);
+ const int bhl = plane_block_height_log2by4(bsize, pd);
const int bw = 1 << bwl, bh = 1 << bhl;
struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
0, 0, 0, ref_best_rd, 0 };
@@ -1185,8 +1182,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
ENTROPY_CONTEXT tl[2], templ[2];
TX_TYPE tx_type = DCT_DCT;
TX_TYPE best_tx_type = DCT_DCT;
- int bw = 1 << b_width_log2(bsize);
- int bh = 1 << b_height_log2(bsize);
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy, block;
DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
@@ -1212,8 +1209,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
vpx_memcpy(tempa, ta, sizeof(ta));
vpx_memcpy(templ, tl, sizeof(tl));
- for (idy = 0; idy < bh; ++idy) {
- for (idx = 0; idx < bw; ++idx) {
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
int64_t ssz;
block = ib + idy * 2 + idx;
@@ -1270,8 +1267,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
best_tx_type = tx_type;
vpx_memcpy(a, tempa, sizeof(tempa));
vpx_memcpy(l, templ, sizeof(templ));
- for (idy = 0; idy < bh; ++idy) {
- for (idx = 0; idx < bw; ++idx) {
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
block = ib + idy * 2 + idx;
vpx_memcpy(best_dqcoeff[idy * 2 + idx],
BLOCK_OFFSET(pd->dqcoeff, block, 16),
@@ -1284,8 +1281,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
if (x->skip_encode)
return best_rd;
- for (idy = 0; idy < bh; ++idy) {
- for (idx = 0; idx < bw; ++idx) {
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
block = ib + idy * 2 + idx;
xd->mode_info_context->bmi[block].as_mode = *best_mode;
src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
@@ -1317,8 +1314,8 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int i, j;
MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
- int bw = 1 << b_width_log2(bsize);
- int bh = 1 << b_height_log2(bsize);
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
int cost = 0;
int64_t distortion = 0;
@@ -1333,8 +1330,8 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
bmode_costs = mb->mbmode_cost;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int mis = xd->mode_info_stride;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
@@ -1357,9 +1354,9 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
tot_rate_y += ry;
mic->bmi[i].as_mode = best_mode;
- for (j = 1; j < bh; ++j)
+ for (j = 1; j < num_4x4_blocks_high; ++j)
mic->bmi[i + j * 2].as_mode = best_mode;
- for (j = 1; j < bw; ++j)
+ for (j = 1; j < num_4x4_blocks_wide; ++j)
mic->bmi[i + j].as_mode = best_mode;
if (total_rd >= best_rd)
@@ -1599,8 +1596,8 @@ static int labels2mode(MACROBLOCK *x, int i,
MB_MODE_INFO * mbmi = &mic->mbmi;
int cost = 0, thismvcost = 0;
int idx, idy;
- int bw = 1 << b_width_log2(mbmi->sb_type);
- int bh = 1 << b_height_log2(mbmi->sb_type);
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
@@ -1650,8 +1647,8 @@ static int labels2mode(MACROBLOCK *x, int i,
mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
x->partition_info->bmi[i].mode = m;
- for (idy = 0; idy < bh; ++idy)
- for (idx = 0; idx < bw; ++idx)
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
&mic->bmi[i], sizeof(mic->bmi[i]));
@@ -1671,10 +1668,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
- const int bwl = plane_block_width_log2by4(bsize, &xd->plane[0]);
- const int bhl = plane_block_height_log2by4(bsize, &xd->plane[0]);
- const int bw = 4 << bwl;
- const int bh = 4 << bhl;
+ const int width = plane_block_width(bsize, &xd->plane[0]);
+ const int height = plane_block_height(bsize, &xd->plane[0]);
int idx, idy;
const int src_stride = x->plane[0].src.stride;
uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
@@ -1698,7 +1693,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[0],
&xd->scale_factor[0],
- bw, bh, 0 /* no avg */, &xd->subpix,
+ width, height, 0, &xd->subpix,
MV_PRECISION_Q3);
if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
@@ -1709,17 +1704,18 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
dst, xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[1],
- &xd->scale_factor[1], bw, bh, 1,
+ &xd->scale_factor[1],
+ width, height, 1,
&xd->subpix, MV_PRECISION_Q3);
}
- vp9_subtract_block(bh, bw, src_diff, 8,
+ vp9_subtract_block(height, width, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
k = i;
- for (idy = 0; idy < bh / 4; ++idy) {
- for (idx = 0; idx < bw / 4; ++idx) {
+ for (idy = 0; idy < height / 4; ++idy) {
+ for (idx = 0; idx < width / 4; ++idx) {
int64_t ssz, rd, rd1, rd2;
k += (idy * 2 + idx);
@@ -1825,8 +1821,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int label_mv_thresh;
int segmentyrate = 0;
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
- int bwl = b_width_log2(bsize), bw = 1 << bwl;
- int bhl = b_height_log2(bsize), bh = 1 << bhl;
+ int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
vp9_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT t_above[2], t_left[2];
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
@@ -1836,7 +1832,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
- v_fn_ptr = &cpi->fn_ptr[get_block_size(bwl, bhl)];
+ v_fn_ptr = &cpi->fn_ptr[bsize];
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
@@ -1845,8 +1841,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
// loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
@@ -1940,9 +1936,24 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (i == 2)
bsi->mvp.as_int =
x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
- step_param = 2;
}
}
+ if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and the best ref mvs of the current block for
+ // the given reference.
+ if (i == 0)
+ step_param = (vp9_init_search_range(
+ cpi, x->max_mv_context[mbmi->ref_frame[0]]) +
+ cpi->mv_step_param) >> 1;
+ else
+ step_param = (vp9_init_search_range(
+ cpi, MAX(abs(bsi->mvp.as_mv.row),
+ abs(bsi->mvp.as_mv.col)) >> 3) +
+ cpi->mv_step_param) >> 1;
+ } else {
+ step_param = cpi->mv_step_param;
+ }
further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
@@ -2023,19 +2034,19 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
x->mvcost, cpi);
bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
- if (bw > 1)
+ if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
mode_mv[this_mode].as_int;
- if (bh > 1)
+ if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
mode_mv[this_mode].as_int;
if (mbmi->ref_frame[1] > 0) {
bsi->rdstat[i][mode_idx].mvs[1].as_int =
second_mode_mv[this_mode].as_int;
- if (bw > 1)
+ if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
second_mode_mv[this_mode].as_int;
- if (bh > 1)
+ if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
second_mode_mv[this_mode].as_int;
}
@@ -2136,11 +2147,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
return;
}
- for (j = 1; j < bh; ++j)
+ for (j = 1; j < num_4x4_blocks_high; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j * 2],
&x->partition_info->bmi[i],
sizeof(x->partition_info->bmi[i]));
- for (j = 1; j < bw; ++j)
+ for (j = 1; j < num_4x4_blocks_wide; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j],
&x->partition_info->bmi[i],
sizeof(x->partition_info->bmi[i]));
@@ -2227,6 +2238,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
int best_index = 0;
int best_sad = INT_MAX;
int this_sad = INT_MAX;
+ unsigned int max_mv = 0;
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
@@ -2236,6 +2248,8 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
+ max_mv = MAX(max_mv,
+ MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
// The list is at an end if we see 0 for a second time.
if (!this_mv.as_int && zero_seen)
break;
@@ -2259,6 +2273,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
// Note the index of the mv that worked best in the reference list.
x->mv_best_ref_index[ref_frame] = best_index;
+ x->max_mv_context[ref_frame] = max_mv;
}
static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
@@ -2505,12 +2520,14 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// Work out the size of the first step in the mv step search.
// 0 here is maximum length first step. 1 is MAX >> 1 etc.
if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
- step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and that based on the best ref mvs of the current
+ // block for the given reference.
+ step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
+ cpi->mv_step_param) >> 1;
} else {
- step_param = vp9_init_search_range(
- cpi, MIN(cpi->common.width, cpi->common.height));
+ step_param = cpi->mv_step_param;
}
-
// mvp_full.as_int = ref_mv[0].as_int;
mvp_full.as_int =
mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 8d5b3860c..ef84cc5c0 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -219,11 +219,11 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
int i, tile_col, mi_row, mi_col;
int temporal_predictor_count[PREDICTION_PROBS][2];
- int no_pred_segcounts[MAX_MB_SEGMENTS];
- int t_unpred_seg_counts[MAX_MB_SEGMENTS];
+ int no_pred_segcounts[MAX_SEGMENTS];
+ int t_unpred_seg_counts[MAX_SEGMENTS];
- vp9_prob no_pred_tree[MB_SEG_TREE_PROBS];
- vp9_prob t_pred_tree[MB_SEG_TREE_PROBS];
+ vp9_prob no_pred_tree[SEG_TREE_PROBS];
+ vp9_prob t_pred_tree[SEG_TREE_PROBS];
vp9_prob t_nopred_prob[PREDICTION_PROBS];
const int mis = cm->mode_info_stride;