summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2011-05-05 06:16:21 -0700
committerCode Review <code-review@webmproject.org>2011-05-05 06:16:21 -0700
commitca5c1b17a2f651639e5f165177acd0178985a761 (patch)
tree865534aefb1e0100fb50262ac43cd8b61b0f6976 /vp8/common
parentaeb86d615c87d80cfd4127c915812e1299f80a33 (diff)
parenta6aa389d2f2f3f341c533a1a8d49e12dd39614a4 (diff)
downloadlibvpx-ca5c1b17a2f651639e5f165177acd0178985a761.tar
libvpx-ca5c1b17a2f651639e5f165177acd0178985a761.tar.gz
libvpx-ca5c1b17a2f651639e5f165177acd0178985a761.tar.bz2
libvpx-ca5c1b17a2f651639e5f165177acd0178985a761.zip
Merge "Loopfilter NEON: Use VMOV for constant vectors instead of VLD."
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/arm/neon/loopfilter_neon.asm15
-rw-r--r--vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm16
-rw-r--r--vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm16
-rw-r--r--vp8/common/arm/neon/mbloopfilter_neon.asm27
4 files changed, 19 insertions, 55 deletions
diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm
index d3a79f640..e73dd6401 100644
--- a/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -308,7 +308,6 @@
; q9 q2
; q10 q3
|vp8_loop_filter_neon| PROC
- ldr r12, _lf_coeff_
; vp8_filter_mask
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
@@ -339,7 +338,7 @@
vqadd.u8 q9, q9, q2 ; a = b + a
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
- vld1.u8 {q0}, [r12]!
+ vmov.u8 q0, #0x80 ; 0x80
; vp8_filter() function
; convert to signed
@@ -348,7 +347,7 @@
veor q5, q5, q0 ; ps1
veor q8, q8, q0 ; qs1
- vld1.u8 {q10}, [r12]!
+ vmov.u8 q10, #3 ; #3
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
vsubl.s8 q11, d15, d13
@@ -367,7 +366,7 @@
vaddw.s8 q2, q2, d2
vaddw.s8 q11, q11, d3
- vld1.u8 {q9}, [r12]!
+ vmov.u8 q9, #4 ; #4
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d2, q2
@@ -399,12 +398,4 @@
;-----------------
-_lf_coeff_
- DCD lf_coeff
-lf_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
-
END
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index 5fe7e7e6d..7c5ea3644 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -22,20 +22,19 @@
; r1 int p, //pitch
; r2 const signed char *flimit,
; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
+; stack(r4) const signed char *thresh (unused)
; //stack(r5) int count --unused
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
- ldr r12, _lfhy_coeff_
vld1.u8 {q5}, [r0], r1 ; p1
vld1.s8 {d2[], d3[]}, [r2] ; flimit
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
vld1.u8 {q6}, [r0], r1 ; p0
- vld1.u8 {q0}, [r12]! ; 0x80
+ vmov.u8 q0, #0x80 ; 0x80
vld1.u8 {q7}, [r0], r1 ; q0
- vld1.u8 {q10}, [r12]! ; 0x03
+ vmov.u8 q10, #0x03 ; 0x03
vld1.u8 {q8}, [r0] ; q1
;vp8_filter_mask() function
@@ -66,7 +65,7 @@
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
vadd.s16 q12, q3, q3
- vld1.u8 {q9}, [r12]! ; 0x04
+ vmov.u8 q9, #0x04 ; 0x04
vadd.s16 q2, q2, q11
vadd.s16 q3, q3, q12
@@ -105,11 +104,4 @@
;-----------------
-_lfhy_coeff_
- DCD lfhy_coeff
-lfhy_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
-
END
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index c30378b9c..a7f7b690e 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -22,7 +22,7 @@
; r1 int p, //pitch
; r2 const signed char *flimit,
; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
+; stack(r4) const signed char *thresh (unused)
; //stack(r5) int count --unused
|vp8_loop_filter_simple_vertical_edge_neon| PROC
@@ -32,7 +32,6 @@
vld1.s8 {d2[], d3[]}, [r2] ; flimit
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
- ldr r12, _vlfy_coeff_
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
@@ -41,11 +40,11 @@
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
- vld1.u8 {q0}, [r12]! ; 0x80
+ vmov.u8 q0, #0x80 ; 0x80
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
- vld1.u8 {q11}, [r12]! ; 0x03
+ vmov.u8 q11, #0x03 ; 0x03
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
- vld1.u8 {q12}, [r12]! ; 0x04
+ vmov.u8 q12, #0x04 ; 0x04
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
@@ -146,11 +145,4 @@
;-----------------
-_vlfy_coeff_
- DCD vlfy_coeff
-vlfy_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
-
END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
index 981adffd1..72f0f9271 100644
--- a/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -372,7 +372,6 @@
; q10 q3
|vp8_mbloop_filter_neon| PROC
- ldr r12, _mblf_coeff_
; vp8_filter_mask
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
@@ -396,7 +395,7 @@
vld1.s8 {d4[], d5[]}, [r2] ; flimit
- vld1.u8 {q0}, [r12]!
+ vmov.u8 q0, #0x80 ; 0x80
vadd.u8 q2, q2, q2 ; flimit * 2
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
@@ -431,12 +430,12 @@
vadd.s16 q2, q2, q10
vadd.s16 q13, q13, q11
- vld1.u8 {q12}, [r12]! ; #3
+ vmov.u8 q12, #3 ; #3
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q13, q13, d3
- vld1.u8 {q11}, [r12]! ; #4
+ vmov.u8 q11, #4 ; #4
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d2, q2
@@ -444,16 +443,16 @@
vand q1, q1, q15 ; vp8_filter &= mask
- vld1.u8 {q15}, [r12]! ; #63
- ;
+ vmov.u16 q15, #63 ; #63
+
vand q13, q1, q14 ; Filter2 &= hev
- vld1.u8 {d7}, [r12]! ; #9
+ vmov.u8 d7, #9 ; #9
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
- vld1.u8 {d6}, [r12]! ; #18
+ vmov.u8 d6, #18 ; #18
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
@@ -463,7 +462,7 @@
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
- vld1.u8 {d5}, [r12]! ; #27
+ vmov.u8 d5, #27 ; #27
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
@@ -507,14 +506,4 @@
;-----------------
-_mblf_coeff_
- DCD mblf_coeff
-mblf_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
- DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
- DCD 0x1b1b1b1b, 0x1b1b1b1b
-
END