diff options
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/arm/arm_csystemdependent.c | 8 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_subtract_armv6.asm | 265 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm | 23 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm | 176 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm | 216 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm | 178 | ||||
-rw-r--r-- | vp8/encoder/arm/encodemb_arm.c | 31 | ||||
-rw-r--r-- | vp8/encoder/arm/encodemb_arm.h | 18 | ||||
-rw-r--r-- | vp8/encoder/arm/neon/subtract_neon.asm | 53 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.c | 77 | ||||
-rw-r--r-- | vp8/encoder/asm_enc_offsets.c | 8 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 3 |
12 files changed, 944 insertions, 112 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index d4a88dc94..f148b8997 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -66,10 +66,10 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) /*cpi->rtcd.encodemb.berr = vp8_block_error_c; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/ + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/ + cpi->rtcd.encodemb.subb = vp8_subtract_b_armv6; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_armv6; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_armv6; /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;*/ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_armv6; diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm new file mode 100644 index 000000000..0ca74387b --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm @@ -0,0 +1,265 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_subtract_mby_armv6| + EXPORT |vp8_subtract_mbuv_armv6| + EXPORT |vp8_subtract_b_armv6| + + INCLUDE asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 BLOCK *be +; r1 BLOCKD *bd +; r2 int pitch +|vp8_subtract_b_armv6| PROC + + stmfd sp!, {r4-r9} + + ldr r4, [r0, #vp8_block_base_src] + ldr r5, [r0, #vp8_block_src] + ldr r6, [r0, #vp8_block_src_diff] + + ldr r3, [r4] + ldr r7, [r0, #vp8_block_src_stride] + add r3, r3, r5 ; src = *base_src + src + ldr r8, [r1, #vp8_blockd_predictor] + + mov r9, #4 ; loop count + +loop_block + + ldr r0, [r3], r7 ; src + ldr r1, [r8], r2 ; pred + + uxtb16 r4, r0 ; [s2 | s0] + uxtb16 r5, r1 ; [p2 | p0] + uxtb16 r0, r0, ror #8 ; [s3 | s1] + uxtb16 r1, r1, ror #8 ; [p3 | p1] + + usub16 r4, r4, r5 ; [d2 | d0] + usub16 r5, r0, r1 ; [d3 | d1] + + subs r9, r9, #1 ; decrement loop counter + + pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] + pkhtb r1, r5, r4, asr #16 ; [d3 | d2] + + str r0, [r6, #0] ; diff + str r1, [r6, #4] ; diff + + add r6, r6, r2, lsl #1 ; update diff pointer + bne loop_block + + ldmfd sp!, {r4-r9} + mov pc, lr + + ENDP + + +; r0 short *diff +; r1 unsigned char *usrc +; r2 unsigned char *vsrc +; r3 unsigned char *pred +; stack int stride +|vp8_subtract_mbuv_armv6| PROC + + stmfd sp!, {r4-r12, lr} + + add r0, r0, #512 ; set *diff point to Cb + add r3, r3, #256 ; set *pred point to Cb + + mov r4, #8 ; loop count + ldr r5, [sp, #40] ; stride + + ; Subtract U block +loop_u + ldr r6, [r1] ; src (A) + ldr r7, [r3], #4 ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r1, #4] ; src (B) + ldr r11, [r3], #4 ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + add r1, r1, r5 ; update usrc pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (B) + + bne loop_u + + mov r4, #8 ; loop count + + ; Subtract V block +loop_v + ldr r6, [r2] ; src (A) + ldr r7, [r3], #4 ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r2, #4] ; src (B) + ldr r11, [r3], #4 ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + add r2, r2, r5 ; update vsrc pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (B) + + bne loop_v + + ldmfd sp!, {r4-r12, pc} + + ENDP + + +; r0 short *diff +; r1 unsigned char *src +; r2 unsigned char *pred +; r3 int stride +|vp8_subtract_mby_armv6| PROC + + stmfd sp!, {r4-r11} + + mov r4, #16 +loop + ldr r6, [r1] ; src (A) + ldr r7, [r2], #4 ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r1, #4] ; src (B) + ldr r11, [r2], #4 ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + ldr r10, [r1, #8] ; src (C) + ldr r11, [r2], #4 ; pred (C) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + uxtb16 r8, r10 ; [s2 | s0] (C) + str r9, [r0], #4 ; diff (B) + + uxtb16 r9, r11 ; [p2 | p0] (C) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) + + usub16 r6, r8, r9 ; [d2 | d0] (C) + usub16 r7, r10, r11 ; [d3 | d1] (C) + + ldr r10, [r1, #12] ; src (D) + ldr r11, [r2], #4 ; pred (D) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) + + str r8, [r0], #4 ; diff (C) + uxtb16 r8, r10 ; [s2 | s0] (D) + str r9, [r0], #4 ; diff (C) + + uxtb16 r9, r11 ; [p2 | p0] (D) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) + + usub16 r6, r8, r9 ; [d2 | d0] (D) + usub16 r7, r10, r11 ; [d3 | d1] (D) + + add r1, r1, r3 ; update src pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) + + str r8, [r0], #4 ; diff (D) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (D) + + bne loop + + ldmfd sp!, {r4-r11} + mov pc, lr + + ENDP + + END + diff --git a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm index 8d7258af7..988376390 100644 --- a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm @@ -25,14 +25,14 @@ |vp8_variance16x16_armv6| PROC stmfd sp!, {r4-r12, lr} - mov r12, #16 ; set loop counter to 16 (=block height) mov r8, #0 ; initialize sum = 0 mov r11, #0 ; initialize sse = 0 + mov r12, #16 ; set loop counter to 16 (=block height) loop ; 1st 4 pixels - ldr r4, [r0, #0x0] ; load 4 src pixels - ldr r5, [r2, #0x0] ; load 4 ref pixels + ldr r4, [r0, #0] ; load 4 src pixels + ldr r5, [r2, #0] ; load 4 ref pixels mov lr, #0 ; constant zero @@ -55,8 +55,8 @@ loop smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) ; 2nd 4 pixels - ldr r4, [r0, #0x4] ; load 4 src pixels - ldr r5, [r2, #0x4] ; load 4 ref pixels + ldr r4, [r0, #4] ; load 4 src pixels + ldr r5, [r2, #4] ; load 4 ref pixels smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) usub8 r6, r4, r5 ; calculate difference @@ -79,8 +79,8 @@ loop smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) ; 3rd 4 pixels - ldr r4, [r0, #0x8] ; load 4 src pixels - ldr r5, [r2, #0x8] ; load 4 ref pixels + ldr r4, [r0, #8] ; load 4 src pixels + ldr r5, [r2, #8] ; load 4 ref pixels smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) usub8 r6, r4, r5 ; calculate difference @@ -103,8 +103,8 @@ loop smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) ; 4th 4 pixels - ldr r4, [r0, #0xc] ; load 4 src pixels - ldr r5, [r2, #0xc] ; load 4 ref pixels + ldr r4, [r0, #12] ; load 4 src pixels + ldr r5, [r2, #12] ; load 4 ref pixels smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) usub8 r6, r4, r5 ; calculate difference @@ -135,13 +135,14 @@ loop bne loop ; return stuff - ldr r6, [sp, #0x28] ; get address of sse + ldr r6, [sp, #40] ; get address of sse mul r0, r8, r8 ; sum * sum str r11, [r6] ; store sse - sub r0, r11, r0, ASR #8 ; return (sse - ((sum * sum) >> 8)) + sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) ldmfd sp!, {r4-r12, pc} ENDP END + diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm new file mode 100644 index 000000000..2350f3e8b --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm @@ -0,0 +1,176 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_variance_halfpixvar16x16_h_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vp8_variance_halfpixvar16x16_h_armv6| PROC + + stmfd sp!, {r4-r12, lr} + mov r8, #0 ; initialize sum = 0 + ldr r10, c80808080 + mov r11, #0 ; initialize sse = 0 + mov r12, #16 ; set loop counter to 16 (=block height) + mov lr, #0 ; constant zero +loop + ; 1st 4 pixels + ldr r4, [r0, #0] ; load 4 src pixels + ldr r6, [r0, #1] ; load 4 src pixels with 1 byte offset + ldr r5, [r2, #0] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + ; calculate total sum + adds r8, r8, r4 ; add positive differences to sum + subs r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r4, [r0, #4] ; load 4 src pixels + ldr r6, [r0, #5] ; load 4 src pixels with 1 byte offset + ldr r5, [r2, #4] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r4, [r0, #8] ; load 4 src pixels + ldr r6, [r0, #9] ; load 4 src pixels with 1 byte offset + ldr r5, [r2, #8] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r4, [r0, #12] ; load 4 src pixels + ldr r6, [r0, #13] ; load 4 src pixels with 1 byte offset + ldr r5, [r2, #12] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + subs r12, r12, #1 + + bne loop + + ; return stuff + ldr r6, [sp, #40] ; get address of sse + mul r0, r8, r8 ; sum * sum + str r11, [r6] ; store sse + sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + + ldmfd sp!, {r4-r12, pc} + + ENDP + +c80808080 + DCD 0x80808080 + + END + diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm new file mode 100644 index 000000000..f9ae3b7e2 --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm @@ -0,0 +1,216 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_variance_halfpixvar16x16_hv_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vp8_variance_halfpixvar16x16_hv_armv6| PROC + + stmfd sp!, {r4-r12, lr} + mov r8, #0 ; initialize sum = 0 + ldr r10, c80808080 + mov r11, #0 ; initialize sse = 0 + mov r12, #16 ; set loop counter to 16 (=block height) + mov lr, #0 ; constant zero +loop + add r9, r0, r1 ; pointer to pixels on the next row + ; 1st 4 pixels + ldr r4, [r0, #0] ; load source pixels a, row N + ldr r6, [r0, #1] ; load source pixels b, row N + ldr r5, [r9, #0] ; load source pixels c, row N+1 + ldr r7, [r9, #1] ; load source pixels d, row N+1 + + ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 + mvn r7, r7 + uhsub8 r5, r5, r7 + eor r5, r5, r10 + ; z = (x + y + 1) >> 1, interpolate half pixel values vertically + mvn r5, r5 + uhsub8 r4, r4, r5 + ldr r5, [r2, #0] ; load 4 ref pixels + eor r4, r4, r10 + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + ; calculate total sum + adds r8, r8, r4 ; add positive differences to sum + subs r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r4, [r0, #4] ; load source pixels a, row N + ldr r6, [r0, #5] ; load source pixels b, row N + ldr r5, [r9, #4] ; load source pixels c, row N+1 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + ldr r7, [r9, #5] ; load source pixels d, row N+1 + + ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 + mvn r7, r7 + uhsub8 r5, r5, r7 + eor r5, r5, r10 + ; z = (x + y + 1) >> 1, interpolate half pixel values vertically + mvn r5, r5 + uhsub8 r4, r4, r5 + ldr r5, [r2, #4] ; load 4 ref pixels + eor r4, r4, r10 + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r4, [r0, #8] ; load source pixels a, row N + ldr r6, [r0, #9] ; load source pixels b, row N + ldr r5, [r9, #8] ; load source pixels c, row N+1 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + ldr r7, [r9, #9] ; load source pixels d, row N+1 + + ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 + mvn r7, r7 + uhsub8 r5, r5, r7 + eor r5, r5, r10 + ; z = (x + y + 1) >> 1, interpolate half pixel values vertically + mvn r5, r5 + uhsub8 r4, r4, r5 + ldr r5, [r2, #8] ; load 4 ref pixels + eor r4, r4, r10 + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r4, [r0, #12] ; load source pixels a, row N + ldr r6, [r0, #13] ; load source pixels b, row N + ldr r5, [r9, #12] ; load source pixels c, row N+1 + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + ldr r7, [r9, #13] ; load source pixels d, row N+1 + + ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 + mvn r7, r7 + uhsub8 r5, r5, r7 + eor r5, r5, r10 + ; z = (x + y + 1) >> 1, interpolate half pixel values vertically + mvn r5, r5 + uhsub8 r4, r4, r5 + ldr r5, [r2, #12] ; load 4 ref pixels + eor r4, r4, r10 + + usub8 r6, r4, r5 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + subs r12, r12, #1 + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + bne loop + + ; return stuff + ldr r6, [sp, #40] ; get address of sse + mul r0, r8, r8 ; sum * sum + str r11, [r6] ; store sse + sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + + ldmfd sp!, {r4-r12, pc} + + ENDP + +c80808080 + DCD 0x80808080 + + END diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm new file mode 100644 index 000000000..9e0a03548 --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm @@ -0,0 +1,178 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_variance_halfpixvar16x16_v_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vp8_variance_halfpixvar16x16_v_armv6| PROC + + stmfd sp!, {r4-r12, lr} + mov r8, #0 ; initialize sum = 0 + ldr r10, c80808080 + mov r11, #0 ; initialize sse = 0 + mov r12, #16 ; set loop counter to 16 (=block height) + mov lr, #0 ; constant zero +loop + add r9, r0, r1 ; set src pointer to next row + ; 1st 4 pixels + ldr r4, [r0, #0] ; load 4 src pixels + ldr r6, [r9, #0] ; load 4 src pixels from next row + ldr r5, [r2, #0] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + ; calculate total sum + adds r8, r8, r4 ; add positive differences to sum + subs r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r4, [r0, #4] ; load 4 src pixels + ldr r6, [r9, #4] ; load 4 src pixels from next row + ldr r5, [r2, #4] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r4, [r0, #8] ; load 4 src pixels + ldr r6, [r9, #8] ; load 4 src pixels from next row + ldr r5, [r2, #8] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r4, [r0, #12] ; load 4 src pixels + ldr r6, [r9, #12] ; load 4 src pixels from next row + ldr r5, [r2, #12] ; load 4 ref pixels + + ; bilinear interpolation + mvn r6, r6 + uhsub8 r4, r4, r6 + eor r4, r4, r10 + + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r6, lr ; select bytes with positive difference + usub8 r6, r5, r4 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r6, r6, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r7, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) + + + subs r12, r12, #1 + + bne loop + + ; return stuff + ldr r6, [sp, #40] ; get address of sse + mul r0, r8, r8 ; sum * sum + str r11, [r6] ; store sse + sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + + ldmfd sp!, {r4-r12, pc} + + ENDP + +c80808080 + DCD 0x80808080 + + END + diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c deleted file mode 100644 index 88ad3fcff..000000000 --- a/vp8/encoder/arm/encodemb_arm.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/encoder/encodemb.h" -#include "vp8/common/reconinter.h" -#include "vp8/encoder/quantize.h" -#include "vp8/common/invtrans.h" -#include "vp8/common/recon.h" -#include "vp8/common/reconintra.h" -#include "vp8/encoder/dct.h" -#include "vpx_mem/vpx_mem.h" - -extern void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch); - -void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) -{ - unsigned char *src_ptr = (*(be->base_src) + be->src); - short *diff_ptr = be->src_diff; - unsigned char *pred_ptr = bd->predictor; - int src_stride = be->src_stride; - - vp8_subtract_b_neon_func(diff_ptr, src_ptr, pred_ptr, src_stride, pitch); -} diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h index 8fe453735..bf417fe1d 100644 --- a/vp8/encoder/arm/encodemb_arm.h +++ b/vp8/encoder/arm/encodemb_arm.h @@ -12,6 +12,24 @@ #ifndef ENCODEMB_ARM_H #define ENCODEMB_ARM_H +#if HAVE_ARMV6 +extern prototype_subb(vp8_subtract_b_armv6); +extern prototype_submby(vp8_subtract_mby_armv6); +extern prototype_submbuv(vp8_subtract_mbuv_armv6); + +#if !CONFIG_RUNTIME_CPU_DETECT +#undef vp8_encodemb_subb +#define vp8_encodemb_subb vp8_subtract_b_armv6 + +#undef vp8_encodemb_submby +#define vp8_encodemb_submby vp8_subtract_mby_armv6 + +#undef vp8_encodemb_submbuv +#define vp8_encodemb_submbuv vp8_subtract_mbuv_armv6 +#endif + +#endif /* HAVE_ARMV6 */ + #if HAVE_ARMV7 //extern prototype_berr(vp8_block_error_c); //extern prototype_mberr(vp8_mbblock_error_c); diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm index 3ea00f8b9..68c295062 100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@ -8,45 +8,58 @@ ; be found in the AUTHORS file in the root of the source tree. ; - - EXPORT |vp8_subtract_b_neon_func| + EXPORT |vp8_subtract_b_neon| EXPORT |vp8_subtract_mby_neon| EXPORT |vp8_subtract_mbuv_neon| + INCLUDE asm_enc_offsets.asm + ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 -;========================================= -;void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch); -|vp8_subtract_b_neon_func| PROC - ldr r12, [sp] ;load pitch - - vld1.8 {d0}, [r1], r3 ;load src - vld1.8 {d1}, [r2], r12 ;load pred - vld1.8 {d2}, [r1], r3 - vld1.8 {d3}, [r2], r12 - vld1.8 {d4}, [r1], r3 - vld1.8 {d5}, [r2], r12 - vld1.8 {d6}, [r1], r3 - vld1.8 {d7}, [r2], r12 + +;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) +|vp8_subtract_b_neon| PROC + + stmfd sp!, {r4-r7} + + ldr r3, [r0, #vp8_block_base_src] + ldr r4, [r0, #vp8_block_src] + ldr r5, [r0, #vp8_block_src_diff] + ldr r3, [r3] + ldr r6, [r0, #vp8_block_src_stride] + add r3, r3, r4 ; src = *base_src + src + ldr r7, [r1, #vp8_blockd_predictor] + + vld1.8 {d0}, [r3], r6 ;load src + vld1.8 {d1}, [r7], r2 ;load pred + vld1.8 {d2}, [r3], r6 + vld1.8 {d3}, [r7], r2 + vld1.8 {d4}, [r3], r6 + vld1.8 {d5}, [r7], r2 + vld1.8 {d6}, [r3], r6 + vld1.8 {d7}, [r7], r2 vsubl.u8 q10, d0, d1 vsubl.u8 q11, d2, d3 vsubl.u8 q12, d4, d5 vsubl.u8 q13, d6, d7 - mov r12, r12, lsl #1 + mov r2, r2, lsl #1 - vst1.16 {d20}, [r0], r12 ;store diff - vst1.16 {d22}, [r0], r12 - vst1.16 {d24}, [r0], r12 - vst1.16 {d26}, [r0], r12 + vst1.16 {d20}, [r5], r2 ;store diff + vst1.16 {d22}, [r5], r2 + vst1.16 {d24}, [r5], r2 + vst1.16 {d26}, [r5], r2 + ldmfd sp!, {r4-r7} bx lr + ENDP + ;========================================== ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride) |vp8_subtract_mby_neon| PROC diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c index ed1fb16d5..e77be9f73 100644 --- a/vp8/encoder/arm/variance_arm.c +++ b/vp8/encoder/arm/variance_arm.c @@ -57,51 +57,38 @@ unsigned int vp8_sub_pixel_variance16x16_armv6 unsigned short first_pass[36*16]; unsigned char second_pass[20*16]; const short *HFilter, *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, - src_pixels_per_line, - 17, 16, HFilter); - vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, - 16, 16, 16, VFilter); - - return vp8_variance16x16_armv6(second_pass, 16, dst_ptr, - dst_pixels_per_line, sse); -} - -unsigned int vp8_variance_halfpixvar16x16_h_armv6( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp8_variance_halfpixvar16x16_v_armv6( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 0, 4, - ref_ptr, recon_stride, sse); -} - -unsigned int vp8_variance_halfpixvar16x16_hv_armv6( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 4, - ref_ptr, recon_stride, sse); + unsigned int var; + + if (xoffset == 4 && yoffset == 0) + { + var = vp8_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, sse); + } + else if (xoffset == 0 && yoffset == 4) + { + var = vp8_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, sse); + } + else if (xoffset == 4 && yoffset == 4) + { + var = vp8_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, sse); + } + else + { + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, + src_pixels_per_line, + 17, 16, HFilter); + vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, + 16, 16, 16, VFilter); + + var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr, + dst_pixels_per_line, sse); + } + return var; } #endif /* HAVE_ARMV6 */ diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c index c7983c1b0..9c81c8d0a 100644 --- a/vp8/encoder/asm_enc_offsets.c +++ b/vp8/encoder/asm_enc_offsets.c @@ -48,6 +48,14 @@ DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant)); DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob)); +// subtract +DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src)); +DEFINE(vp8_block_src, offsetof(BLOCK, src)); +DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff)); +DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride)); + +DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor)); + //pack tokens DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue)); DEFINE(vp8_writer_range, offsetof(vp8_writer, range)); diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index e5d2bd84c..a18447d51 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -3258,7 +3258,8 @@ void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) } #if CONFIG_MULTITHREAD - sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */ + if (cpi->b_multi_threaded) + sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */ #endif if (cm->filter_level > 0) |