summaryrefslogtreecommitdiff
path: root/vp8/encoder/arm/armv6
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2011-11-15 12:53:01 -0500
committerScott LaVarnway <slavarnway@google.com>2011-11-15 12:53:01 -0500
commitedd98b7310e0338708abfbd0826688222e1e6f57 (patch)
tree340f14f39c1838ebe5c0072c982f7411eed14914 /vp8/encoder/arm/armv6
parent2a6daa72f0f04ee674f14a7d5c7ee7693fc87aa2 (diff)
downloadlibvpx-edd98b7310e0338708abfbd0826688222e1e6f57.tar
libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.tar.gz
libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.tar.bz2
libvpx-edd98b7310e0338708abfbd0826688222e1e6f57.zip
Added predictor stride argument(s) to subtract functions
Patch set 2: 64 bit build fix Patch set 3: 64 bit crash fix [Tero] Patch set 4: Updated ARMv6 and NEON assembly. Added also minor NEON optimizations to subtract functions. Patch set 5: x86 stride bug fix Change-Id: I1fcca93e90c89b89ddc204e1c18f208682675c15
Diffstat (limited to 'vp8/encoder/arm/armv6')
-rw-r--r--vp8/encoder/arm/armv6/vp8_subtract_armv6.asm63
1 files changed, 35 insertions, 28 deletions
diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
index 0ca74387b..f329f8f73 100644
--- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
@@ -72,22 +72,23 @@ loop_block
; r0 short *diff
; r1 unsigned char *usrc
; r2 unsigned char *vsrc
-; r3 unsigned char *pred
-; stack int stride
+; r3 int src_stride
+; sp unsigned char *upred
+; sp unsigned char *vpred
+; sp int pred_stride
|vp8_subtract_mbuv_armv6| PROC
- stmfd sp!, {r4-r12, lr}
+ stmfd sp!, {r4-r11}
add r0, r0, #512 ; set *diff point to Cb
- add r3, r3, #256 ; set *pred point to Cb
-
mov r4, #8 ; loop count
- ldr r5, [sp, #40] ; stride
+ ldr r5, [sp, #32] ; upred
+ ldr r12, [sp, #40] ; pred_stride
; Subtract U block
loop_u
- ldr r6, [r1] ; src (A)
- ldr r7, [r3], #4 ; pred (A)
+ ldr r6, [r1] ; usrc (A)
+ ldr r7, [r5] ; upred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
@@ -97,8 +98,8 @@ loop_u
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
- ldr r10, [r1, #4] ; src (B)
- ldr r11, [r3], #4 ; pred (B)
+ ldr r10, [r1, #4] ; usrc (B)
+ ldr r11, [r5, #4] ; upred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
@@ -114,7 +115,8 @@ loop_u
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
- add r1, r1, r5 ; update usrc pointer
+ add r1, r1, r3 ; update usrc pointer
+ add r5, r5, r12 ; update upred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
@@ -125,12 +127,13 @@ loop_u
bne loop_u
+ ldr r5, [sp, #36] ; vpred
mov r4, #8 ; loop count
; Subtract V block
loop_v
- ldr r6, [r2] ; src (A)
- ldr r7, [r3], #4 ; pred (A)
+ ldr r6, [r2] ; vsrc (A)
+ ldr r7, [r5] ; vpred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
@@ -140,8 +143,8 @@ loop_v
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
- ldr r10, [r2, #4] ; src (B)
- ldr r11, [r3], #4 ; pred (B)
+ ldr r10, [r2, #4] ; vsrc (B)
+ ldr r11, [r5, #4] ; vpred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
@@ -157,7 +160,8 @@ loop_v
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
- add r2, r2, r5 ; update vsrc pointer
+ add r2, r2, r3 ; update vsrc pointer
+ add r5, r5, r12 ; update vpred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
@@ -168,23 +172,25 @@ loop_v
bne loop_v
- ldmfd sp!, {r4-r12, pc}
+ ldmfd sp!, {r4-r11}
+ bx lr
ENDP
; r0 short *diff
; r1 unsigned char *src
-; r2 unsigned char *pred
-; r3 int stride
+; r2 int src_stride
+; r3 unsigned char *pred
+; sp int pred_stride
|vp8_subtract_mby_armv6| PROC
stmfd sp!, {r4-r11}
-
+ ldr r12, [sp, #32] ; pred_stride
mov r4, #16
loop
ldr r6, [r1] ; src (A)
- ldr r7, [r2], #4 ; pred (A)
+ ldr r7, [r3] ; pred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
@@ -195,7 +201,7 @@ loop
usub16 r7, r10, r11 ; [d3 | d1] (A)
ldr r10, [r1, #4] ; src (B)
- ldr r11, [r2], #4 ; pred (B)
+ ldr r11, [r3, #4] ; pred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
@@ -212,7 +218,7 @@ loop
usub16 r7, r10, r11 ; [d3 | d1] (B)
ldr r10, [r1, #8] ; src (C)
- ldr r11, [r2], #4 ; pred (C)
+ ldr r11, [r3, #8] ; pred (C)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
@@ -229,10 +235,10 @@ loop
usub16 r7, r10, r11 ; [d3 | d1] (C)
ldr r10, [r1, #12] ; src (D)
- ldr r11, [r2], #4 ; pred (D)
+ ldr r11, [r3, #12] ; pred (D)
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
str r8, [r0], #4 ; diff (C)
uxtb16 r8, r10 ; [s2 | s0] (D)
@@ -245,7 +251,8 @@ loop
usub16 r6, r8, r9 ; [d2 | d0] (D)
usub16 r7, r10, r11 ; [d3 | d1] (D)
- add r1, r1, r3 ; update src pointer
+ add r1, r1, r2 ; update src pointer
+ add r3, r3, r12 ; update pred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
@@ -257,7 +264,7 @@ loop
bne loop
ldmfd sp!, {r4-r11}
- mov pc, lr
+ bx lr
ENDP