summaryrefslogtreecommitdiff
path: root/vp9/common/arm
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common/arm')
-rw-r--r--vp9/common/arm/neon/vp9_idct16x16_neon.c40
-rw-r--r--vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm8
-rw-r--r--vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm32
3 files changed, 40 insertions, 40 deletions
diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c
index fb7b5cdc4..33aa4e001 100644
--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c
+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c
@@ -11,19 +11,19 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
-extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
+extern void vp9_idct16x16_256_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
-extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
+extern void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
+extern void vp9_idct16x16_10_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
-extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
+extern void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
@@ -34,7 +34,7 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
extern void vp9_push_neon(int64_t *store);
extern void vp9_pop_neon(int64_t *store);
-void vp9_short_idct16x16_add_neon(int16_t *input,
+void vp9_idct16x16_256_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
@@ -46,12 +46,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
+ vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_short_idct16x16_add_neon_pass2(input+1,
+ vp9_idct16x16_256_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
@@ -61,12 +61,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the lower 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
+ vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
+ vp9_idct16x16_256_add_neon_pass2(input+8*16+1,
row_idct_output+8,
pass1_output,
0,
@@ -76,12 +76,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
+ vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
row_idct_output,
pass1_output,
1,
@@ -91,12 +91,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
+ vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
row_idct_output+8,
pass1_output,
1,
@@ -109,7 +109,7 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
return;
}
-void vp9_short_idct16x16_10_add_neon(int16_t *input,
+void vp9_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
@@ -121,12 +121,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
+ vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_short_idct16x16_10_add_neon_pass2(input+1,
+ vp9_idct16x16_10_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
@@ -138,12 +138,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
+ vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
row_idct_output,
pass1_output,
1,
@@ -153,12 +153,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
+ vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
row_idct_output+8,
pass1_output,
1,
diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
index cf5c8f7d0..b1fd21bb6 100644
--- a/vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
@@ -8,21 +8,21 @@
;
- EXPORT |vp9_short_idct16x16_1_add_neon|
+ EXPORT |vp9_idct16x16_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_short_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
+;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_short_idct16x16_1_add_neon| PROC
+|vp9_idct16x16_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
@@ -193,6 +193,6 @@
vst1.64 {d31}, [r12], r2
bx lr
- ENDP ; |vp9_short_idct16x16_1_add_neon|
+ ENDP ; |vp9_idct16x16_1_add_neon|
END
diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
index df2a0526c..a13c0d04b 100644
--- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
@@ -8,10 +8,10 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_short_idct16x16_add_neon_pass1|
- EXPORT |vp9_short_idct16x16_add_neon_pass2|
- EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
- EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
+ EXPORT |vp9_idct16x16_256_add_neon_pass1|
+ EXPORT |vp9_idct16x16_256_add_neon_pass2|
+ EXPORT |vp9_idct16x16_10_add_neon_pass1|
+ EXPORT |vp9_idct16x16_10_add_neon_pass2|
ARM
REQUIRE8
PRESERVE8
@@ -36,7 +36,7 @@
MEND
AREA Block, CODE, READONLY ; name this block of code
-;void |vp9_short_idct16x16_add_neon_pass1|(int16_t *input,
+;void |vp9_idct16x16_256_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
@@ -46,7 +46,7 @@
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct16x16_add_neon_pass1| PROC
+|vp9_idct16x16_256_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
@@ -273,9 +273,9 @@
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_short_idct16x16_add_neon_pass1|
+ ENDP ; |vp9_idct16x16_256_add_neon_pass1|
-;void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
+;void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
@@ -292,7 +292,7 @@
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct16x16_add_neon_pass2| PROC
+|vp9_idct16x16_256_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
@@ -784,9 +784,9 @@ skip_adding_dest
end_idct16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_short_idct16x16_add_neon_pass2|
+ ENDP ; |vp9_idct16x16_256_add_neon_pass2|
-;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,
+;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
@@ -796,7 +796,7 @@ end_idct16x16_pass2
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct16x16_10_add_neon_pass1| PROC
+|vp9_idct16x16_10_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
@@ -905,9 +905,9 @@ end_idct16x16_pass2
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_short_idct16x16_10_add_neon_pass1|
+ ENDP ; |vp9_idct16x16_10_add_neon_pass1|
-;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
+;void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
@@ -924,7 +924,7 @@ end_idct16x16_pass2
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct16x16_10_add_neon_pass2| PROC
+|vp9_idct16x16_10_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
@@ -1175,5 +1175,5 @@ end_idct16x16_pass2
end_idct10_16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
+ ENDP ; |vp9_idct16x16_10_add_neon_pass2|
END