diff options
author | Scott LaVarnway <slavarnway@google.com> | 2011-11-15 12:53:01 -0500 |
---|---|---|
committer | Scott LaVarnway <slavarnway@google.com> | 2011-11-15 12:53:01 -0500 |
commit | edd98b7310e0338708abfbd0826688222e1e6f57 (patch) | |
tree | 340f14f39c1838ebe5c0072c982f7411eed14914 /vp8/encoder/arm/armv6 | |
parent | 2a6daa72f0f04ee674f14a7d5c7ee7693fc87aa2 (diff) | |
download | libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.tar libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.tar.gz libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.tar.bz2 libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.zip |
Added predictor stride argument(s) to subtract functions
Patch set 2: 64 bit build fix
Patch set 3: 64 bit crash fix
[Tero]
Patch set 4: Updated ARMv6 and NEON assembly.
Added also minor NEON optimizations to subtract
functions.
Patch set 5: x86 stride bug fix
Change-Id: I1fcca93e90c89b89ddc204e1c18f208682675c15
Diffstat (limited to 'vp8/encoder/arm/armv6')
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_subtract_armv6.asm | 63 |
1 files changed, 35 insertions, 28 deletions
diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm index 0ca74387b..f329f8f73 100644 --- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm @@ -72,22 +72,23 @@ loop_block ; r0 short *diff ; r1 unsigned char *usrc ; r2 unsigned char *vsrc -; r3 unsigned char *pred -; stack int stride +; r3 int src_stride +; sp unsigned char *upred +; sp unsigned char *vpred +; sp int pred_stride |vp8_subtract_mbuv_armv6| PROC - stmfd sp!, {r4-r12, lr} + stmfd sp!, {r4-r11} add r0, r0, #512 ; set *diff point to Cb - add r3, r3, #256 ; set *pred point to Cb - mov r4, #8 ; loop count - ldr r5, [sp, #40] ; stride + ldr r5, [sp, #32] ; upred + ldr r12, [sp, #40] ; pred_stride ; Subtract U block loop_u - ldr r6, [r1] ; src (A) - ldr r7, [r3], #4 ; pred (A) + ldr r6, [r1] ; usrc (A) + ldr r7, [r5] ; upred (A) uxtb16 r8, r6 ; [s2 | s0] (A) uxtb16 r9, r7 ; [p2 | p0] (A) @@ -97,8 +98,8 @@ loop_u usub16 r6, r8, r9 ; [d2 | d0] (A) usub16 r7, r10, r11 ; [d3 | d1] (A) - ldr r10, [r1, #4] ; src (B) - ldr r11, [r3], #4 ; pred (B) + ldr r10, [r1, #4] ; usrc (B) + ldr r11, [r5, #4] ; upred (B) pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) @@ -114,7 +115,8 @@ loop_u usub16 r6, r8, r9 ; [d2 | d0] (B) usub16 r7, r10, r11 ; [d3 | d1] (B) - add r1, r1, r5 ; update usrc pointer + add r1, r1, r3 ; update usrc pointer + add r5, r5, r12 ; update upred pointer pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) @@ -125,12 +127,13 @@ loop_u bne loop_u + ldr r5, [sp, #36] ; vpred mov r4, #8 ; loop count ; Subtract V block loop_v - ldr r6, [r2] ; src (A) - ldr r7, [r3], #4 ; pred (A) + ldr r6, [r2] ; vsrc (A) + ldr r7, [r5] ; vpred (A) uxtb16 r8, r6 ; [s2 | s0] (A) uxtb16 r9, r7 ; [p2 | p0] (A) @@ -140,8 +143,8 @@ loop_v usub16 r6, r8, r9 ; [d2 | d0] (A) usub16 r7, r10, r11 ; [d3 | d1] (A) - ldr r10, [r2, #4] ; src (B) - ldr r11, [r3], #4 ; pred (B) + ldr r10, [r2, #4] ; vsrc (B) + ldr r11, [r5, #4] ; vpred (B) pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) @@ -157,7 +160,8 @@ loop_v usub16 r6, r8, r9 ; [d2 | d0] (B) usub16 r7, r10, r11 ; [d3 | d1] (B) - add r2, r2, r5 ; update vsrc pointer + add r2, r2, r3 ; update vsrc pointer + add r5, r5, r12 ; update vpred pointer pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) @@ -168,23 +172,25 @@ loop_v bne loop_v - ldmfd sp!, {r4-r12, pc} + ldmfd sp!, {r4-r11} + bx lr ENDP ; r0 short *diff ; r1 unsigned char *src -; r2 unsigned char *pred -; r3 int stride +; r2 int src_stride +; r3 unsigned char *pred +; sp int pred_stride |vp8_subtract_mby_armv6| PROC stmfd sp!, {r4-r11} - + ldr r12, [sp, #32] ; pred_stride mov r4, #16 loop ldr r6, [r1] ; src (A) - ldr r7, [r2], #4 ; pred (A) + ldr r7, [r3] ; pred (A) uxtb16 r8, r6 ; [s2 | s0] (A) uxtb16 r9, r7 ; [p2 | p0] (A) @@ -195,7 +201,7 @@ loop usub16 r7, r10, r11 ; [d3 | d1] (A) ldr r10, [r1, #4] ; src (B) - ldr r11, [r2], #4 ; pred (B) + ldr r11, [r3, #4] ; pred (B) pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) @@ -212,7 +218,7 @@ loop usub16 r7, r10, r11 ; [d3 | d1] (B) ldr r10, [r1, #8] ; src (C) - ldr r11, [r2], #4 ; pred (C) + ldr r11, [r3, #8] ; pred (C) pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) @@ -229,10 +235,10 @@ loop usub16 r7, r10, r11 ; [d3 | d1] (C) ldr r10, [r1, #12] ; src (D) - ldr r11, [r2], #4 ; pred (D) + ldr r11, [r3, #12] ; pred (D) - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) str r8, [r0], #4 ; diff (C) uxtb16 r8, r10 ; [s2 | s0] (D) @@ -245,7 +251,8 @@ loop usub16 r6, r8, r9 ; [d2 | d0] (D) usub16 r7, r10, r11 ; [d3 | d1] (D) - add r1, r1, r3 ; update src pointer + add r1, r1, r2 ; update src pointer + add r3, r3, r12 ; update pred pointer pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) @@ -257,7 +264,7 @@ loop bne loop ldmfd sp!, {r4-r11} - mov pc, lr + bx lr ENDP |