summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm7
-rw-r--r--vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm12
-rw-r--r--vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm6
-rw-r--r--vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm6
-rw-r--r--vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm6
-rw-r--r--vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm6
-rw-r--r--vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm6
7 files changed, 48 insertions, 1 deletions
diff --git a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
index a9060d76f..000805d4f 100644
--- a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
@@ -27,8 +27,11 @@
|vp8_mse16x16_armv6| PROC
push {r4-r9, lr}
- mov r12, #16 ; set loop counter to 16 (=block height)
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
+ mov r12, #16 ; set loop counter to 16 (=block height)
mov r4, #0 ; initialize sse = 0
loop
@@ -39,8 +42,10 @@ loop
mov lr, #0 ; constant zero
usub8 r8, r5, r6 ; calculate difference
+ pld [r0, r1, lsl #1]
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
diff --git a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
index c759f7c65..1b4f5cf3b 100644
--- a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
@@ -24,6 +24,12 @@
; stack max_sad (not used)
|vp8_sad16x16_armv6| PROC
stmfd sp!, {r4-r12, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+ pld [r0, r1, lsl #1]
+ pld [r2, r3, lsl #1]
+
mov r4, #0 ; sad = 0;
mov r5, #8 ; loop count
@@ -45,6 +51,9 @@ loop
add r0, r0, r1 ; set src pointer to next row
add r2, r2, r3 ; set dst pointer to next row
+ pld [r0, r1, lsl #1]
+ pld [r2, r3, lsl #1]
+
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
@@ -70,6 +79,9 @@ loop
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
+ pld [r0, r1, lsl #1]
+ pld [r2, r3, lsl #1]
+
subs r5, r5, #1 ; decrement loop counter
add r4, r4, r8 ; add partial sad values
diff --git a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
index 988376390..5feaa8bc2 100644
--- a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
@@ -25,6 +25,10 @@
|vp8_variance16x16_armv6| PROC
stmfd sp!, {r4-r12, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
mov r8, #0 ; initialize sum = 0
mov r11, #0 ; initialize sse = 0
mov r12, #16 ; set loop counter to 16 (=block height)
@@ -37,8 +41,10 @@ loop
mov lr, #0 ; constant zero
usub8 r6, r4, r5 ; calculate difference
+ pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
diff --git a/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
index 7daecb925..adc353d20 100644
--- a/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
@@ -23,6 +23,10 @@
|vp8_variance8x8_armv6| PROC
push {r4-r10, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
mov r12, #8 ; set loop counter to 8 (=block height)
mov r4, #0 ; initialize sum = 0
mov r5, #0 ; initialize sse = 0
@@ -35,8 +39,10 @@ loop
mov lr, #0 ; constant zero
usub8 r8, r6, r7 ; calculate difference
+ pld [r0, r1, lsl #1]
sel r10, r8, lr ; select bytes with positive difference
usub8 r9, r7, r6 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
index 2350f3e8b..1b5489795 100644
--- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -25,6 +25,10 @@
|vp8_variance_halfpixvar16x16_h_armv6| PROC
stmfd sp!, {r4-r12, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
mov r8, #0 ; initialize sum = 0
ldr r10, c80808080
mov r11, #0 ; initialize sse = 0
@@ -42,8 +46,10 @@ loop
eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference
+ pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference
usub8 r6, r5, r4 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
sel r6, r6, lr ; select bytes with negative difference
; calculate partial sums
diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
index f9ae3b7e2..38c55edf8 100644
--- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -25,6 +25,10 @@
|vp8_variance_halfpixvar16x16_hv_armv6| PROC
stmfd sp!, {r4-r12, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
mov r8, #0 ; initialize sum = 0
ldr r10, c80808080
mov r11, #0 ; initialize sse = 0
@@ -53,8 +57,10 @@ loop
eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference
+ pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference
usub8 r6, r5, r4 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
sel r6, r6, lr ; select bytes with negative difference
; calculate partial sums
diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
index 9e0a03548..22a50eb00 100644
--- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -25,6 +25,10 @@
|vp8_variance_halfpixvar16x16_v_armv6| PROC
stmfd sp!, {r4-r12, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
mov r8, #0 ; initialize sum = 0
ldr r10, c80808080
mov r11, #0 ; initialize sse = 0
@@ -43,8 +47,10 @@ loop
eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference
+ pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference
usub8 r6, r5, r4 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
sel r6, r6, lr ; select bytes with negative difference
; calculate partial sums