summaryrefslogtreecommitdiff
path: root/vpx_scale/arm/neon
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2012-11-01 17:53:44 -0700
committerJohn Koleszar <jkoleszar@google.com>2012-11-02 08:44:54 -0700
commit06f3e51da62dd7cdc4af129a9925cc85eda745e4 (patch)
treea8327d40dda49c02ff5d5e6c19dded41946e7f1b /vpx_scale/arm/neon
parent4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f (diff)
downloadlibvpx-06f3e51da62dd7cdc4af129a9925cc85eda745e4.tar
libvpx-06f3e51da62dd7cdc4af129a9925cc85eda745e4.tar.gz
libvpx-06f3e51da62dd7cdc4af129a9925cc85eda745e4.tar.bz2
libvpx-06f3e51da62dd7cdc4af129a9925cc85eda745e4.zip
vpx_scale: sync from master
Update vpx_scale from current code in master, run style transform, fix lint warnings. Change-Id: I47eadeb5b6881d448ea3728537f9b8a5b5aac78e
Diffstat (limited to 'vpx_scale/arm/neon')
-rw-r--r--vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm122
-rw-r--r--vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm11
-rw-r--r--vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm500
-rw-r--r--vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm9
-rw-r--r--vpx_scale/arm/neon/yv12extend_arm.c15
5 files changed, 142 insertions, 515 deletions
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
new file mode 100644
index 000000000..9189641b1
--- /dev/null
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
@@ -0,0 +1,122 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_yv12_copy_y_neon|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ INCLUDE asm_com_offsets.asm
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+;void vpxyv12_copy_y_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
+|vp8_yv12_copy_y_neon| PROC
+ push {r4 - r11, lr}
+ vpush {d8-d15}
+
+ ldr r4, [r0, #yv12_buffer_config_y_height]
+ ldr r5, [r0, #yv12_buffer_config_y_width]
+ ldr r6, [r0, #yv12_buffer_config_y_stride]
+ ldr r7, [r1, #yv12_buffer_config_y_stride]
+ ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
+ ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
+
+ ; copy two rows at one time
+ mov lr, r4, lsr #1
+
+cp_src_to_dst_height_loop1
+ mov r8, r2
+ mov r9, r3
+ add r10, r2, r6
+ add r11, r3, r7
+ movs r12, r5, lsr #7
+ ble extra_copy_needed ; y_width < 128
+
+cp_src_to_dst_width_loop1
+ vld1.8 {q0, q1}, [r8]!
+ vld1.8 {q8, q9}, [r10]!
+ vld1.8 {q2, q3}, [r8]!
+ vld1.8 {q10, q11}, [r10]!
+ vld1.8 {q4, q5}, [r8]!
+ vld1.8 {q12, q13}, [r10]!
+ vld1.8 {q6, q7}, [r8]!
+ vld1.8 {q14, q15}, [r10]!
+
+ subs r12, r12, #1
+
+ vst1.8 {q0, q1}, [r9]!
+ vst1.8 {q8, q9}, [r11]!
+ vst1.8 {q2, q3}, [r9]!
+ vst1.8 {q10, q11}, [r11]!
+ vst1.8 {q4, q5}, [r9]!
+ vst1.8 {q12, q13}, [r11]!
+ vst1.8 {q6, q7}, [r9]!
+ vst1.8 {q14, q15}, [r11]!
+
+ bne cp_src_to_dst_width_loop1
+
+ subs lr, lr, #1
+ add r2, r2, r6, lsl #1
+ add r3, r3, r7, lsl #1
+
+ bne cp_src_to_dst_height_loop1
+
+extra_copy_needed
+ ands r10, r5, #0x7f ;check to see if extra copy is needed
+ sub r11, r5, r10
+ ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
+ ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
+ bne extra_cp_src_to_dst_width1
+end_of_cp_src_to_dst1
+
+ vpop {d8 - d15}
+ pop {r4-r11, pc}
+
+;=============================
+extra_cp_src_to_dst_width1
+ add r2, r2, r11
+ add r3, r3, r11
+ add r0, r8, r6
+ add r11, r9, r7
+
+ mov lr, r4, lsr #1
+extra_cp_src_to_dst_height_loop1
+ mov r8, r2
+ mov r9, r3
+ add r0, r8, r6
+ add r11, r9, r7
+
+ mov r12, r10
+
+extra_cp_src_to_dst_width_loop1
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q1}, [r0]!
+
+ subs r12, r12, #16
+
+ vst1.8 {q0}, [r9]!
+ vst1.8 {q1}, [r11]!
+ bne extra_cp_src_to_dst_width_loop1
+
+ subs lr, lr, #1
+
+ add r2, r2, r6, lsl #1
+ add r3, r3, r7, lsl #1
+
+ bne extra_cp_src_to_dst_height_loop1
+
+ b end_of_cp_src_to_dst1
+
+ ENDP
+
+ END
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index e6bb486c8..e55d076d9 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -18,7 +18,8 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
+; YV12_BUFFER_CONFIG *dst_ybc);
|vp8_yv12_copy_frame_func_neon| PROC
push {r4 - r11, lr}
@@ -52,7 +53,8 @@ cp_src_to_dst_height_loop
mov r9, r3
add r10, r2, r6
add r11, r3, r7
- mov r12, r5, lsr #7
+ movs r12, r5, lsr #7
+ ble extra_cp_needed ; y_width < 128
cp_src_to_dst_width_loop
vld1.8 {q0, q1}, [r8]!
@@ -83,6 +85,7 @@ cp_src_to_dst_width_loop
bne cp_src_to_dst_height_loop
+extra_cp_needed
ands r10, r5, #0x7f ;check to see if extra copy is needed
sub r11, r5, r10
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
@@ -110,7 +113,8 @@ cp_src_to_dst_height_uv_loop
mov r9, r3
add r10, r2, r6
add r11, r3, r7
- mov r12, r5, lsr #6
+ movs r12, r5, lsr #6
+ ble extra_uv_cp_needed
cp_src_to_dst_width_uv_loop
vld1.8 {q0, q1}, [r8]!
@@ -133,6 +137,7 @@ cp_src_to_dst_width_uv_loop
bne cp_src_to_dst_height_uv_loop
+extra_uv_cp_needed
ands r10, r5, #0x3f ;check to see if extra copy is needed
sub r11, r5, r10
ldr r2, [sp] ;srcptr1
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
deleted file mode 100644
index febccc2d8..000000000
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+++ /dev/null
@@ -1,500 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_yv12_copy_frame_yonly_neon|
- EXPORT |vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- INCLUDE asm_com_offsets.asm
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vpxyv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
-; Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height
-; are always multiples of 16.
-
-|vp8_yv12_copy_frame_yonly_neon| PROC
- push {r4 - r11, lr}
- vpush {d8 - d15}
-
- ldr r4, [r0, #yv12_buffer_config_y_height]
- ldr r5, [r0, #yv12_buffer_config_y_width]
- ldr r6, [r0, #yv12_buffer_config_y_stride]
- ldr r7, [r1, #yv12_buffer_config_y_stride]
- ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
-
- ; copy two rows at one time
- mov lr, r4, lsr #1
-
-cp_src_to_dst_height_loop
- mov r8, r2
- mov r9, r3
- add r10, r2, r6
- add r11, r3, r7
- mov r12, r5, lsr #7
-
-cp_src_to_dst_width_loop
- vld1.8 {q0, q1}, [r8]!
- vld1.8 {q8, q9}, [r10]!
- vld1.8 {q2, q3}, [r8]!
- vld1.8 {q10, q11}, [r10]!
- vld1.8 {q4, q5}, [r8]!
- vld1.8 {q12, q13}, [r10]!
- vld1.8 {q6, q7}, [r8]!
- vld1.8 {q14, q15}, [r10]!
-
- subs r12, r12, #1
-
- vst1.8 {q0, q1}, [r9]!
- vst1.8 {q8, q9}, [r11]!
- vst1.8 {q2, q3}, [r9]!
- vst1.8 {q10, q11}, [r11]!
- vst1.8 {q4, q5}, [r9]!
- vst1.8 {q12, q13}, [r11]!
- vst1.8 {q6, q7}, [r9]!
- vst1.8 {q14, q15}, [r11]!
-
- bne cp_src_to_dst_width_loop
-
- subs lr, lr, #1
- add r2, r2, r6, lsl #1
- add r3, r3, r7, lsl #1
-
- bne cp_src_to_dst_height_loop
-
- ands r10, r5, #0x7f ;check to see if extra copy is needed
- sub r11, r5, r10
- ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
- bne extra_cp_src_to_dst_width
-end_of_cp_src_to_dst
-
-
- ;vpxyv12_extend_frame_borders_yonly
- mov r0, r1
- ;Not need to load y_width, since: y_width = y_stride - 2*border
- ldr r3, [r0, #yv12_buffer_config_border]
- ldr r1, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- ldr r4, [r0, #yv12_buffer_config_y_height]
- ldr lr, [r0, #yv12_buffer_config_y_stride]
-
- cmp r3, #16
- beq b16_extend_frame_borders
-
-;=======================
-b32_extend_frame_borders
-;border = 32
-;=======================
-;Border copy for Y plane
-;copy the left and right most columns out
- sub r5, r1, r3 ;destptr1
- add r6, r1, lr
- sub r6, r6, r3, lsl #1 ;destptr2
- sub r2, r6, #1 ;srcptr2
-
- ;Do four rows at one time
- mov r12, r4, lsr #2
-
-copy_left_right_y
- vld1.8 {d0[], d1[]}, [r1], lr
- vld1.8 {d4[], d5[]}, [r2], lr
- vld1.8 {d8[], d9[]}, [r1], lr
- vld1.8 {d12[], d13[]}, [r2], lr
- vld1.8 {d16[], d17[]}, [r1], lr
- vld1.8 {d20[], d21[]}, [r2], lr
- vld1.8 {d24[], d25[]}, [r1], lr
- vld1.8 {d28[], d29[]}, [r2], lr
-
- vmov q1, q0
- vmov q3, q2
- vmov q5, q4
- vmov q7, q6
- vmov q9, q8
- vmov q11, q10
- vmov q13, q12
- vmov q15, q14
-
- subs r12, r12, #1
-
- vst1.8 {q0, q1}, [r5], lr
- vst1.8 {q2, q3}, [r6], lr
- vst1.8 {q4, q5}, [r5], lr
- vst1.8 {q6, q7}, [r6], lr
- vst1.8 {q8, q9}, [r5], lr
- vst1.8 {q10, q11}, [r6], lr
- vst1.8 {q12, q13}, [r5], lr
- vst1.8 {q14, q15}, [r6], lr
-
- bne copy_left_right_y
-
-;Now copy the top and bottom source lines into each line of the respective borders
- ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- mul r8, r3, lr
-
- mov r12, lr, lsr #7
-
- sub r6, r1, r3 ;destptr2
- sub r2, r6, lr ;srcptr2
- sub r1, r7, r3 ;srcptr1
- sub r5, r1, r8 ;destptr1
-
-copy_top_bottom_y
- vld1.8 {q0, q1}, [r1]!
- vld1.8 {q8, q9}, [r2]!
- vld1.8 {q2, q3}, [r1]!
- vld1.8 {q10, q11}, [r2]!
- vld1.8 {q4, q5}, [r1]!
- vld1.8 {q12, q13}, [r2]!
- vld1.8 {q6, q7}, [r1]!
- vld1.8 {q14, q15}, [r2]!
-
- mov r7, r3
-
-top_bottom_32
- subs r7, r7, #1
-
- vst1.8 {q0, q1}, [r5]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q2, q3}, [r5]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q4, q5}, [r5]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q6, q7}, [r5]!
- vst1.8 {q14, q15}, [r6]!
-
- add r5, r5, lr
- sub r5, r5, #128
- add r6, r6, lr
- sub r6, r6, #128
-
- bne top_bottom_32
-
- sub r5, r1, r8
- add r6, r2, lr
-
- subs r12, r12, #1
- bne copy_top_bottom_y
-
- mov r7, lr, lsr #4 ;check to see if extra copy is needed
- ands r7, r7, #0x7
- bne extra_top_bottom_y
-end_of_border_copy_y
-
- vpop {d8 - d15}
- pop {r4 - r11, pc}
-
-;=====================
-;extra copy part for Y
-extra_top_bottom_y
- vld1.8 {q0}, [r1]!
- vld1.8 {q2}, [r2]!
-
- mov r9, r3, lsr #3
-
-extra_top_bottom_32
- subs r9, r9, #1
-
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- bne extra_top_bottom_32
-
- sub r5, r1, r8
- add r6, r2, lr
- subs r7, r7, #1
- bne extra_top_bottom_y
-
- b end_of_border_copy_y
-
-
-;=======================
-b16_extend_frame_borders
-;border = 16
-;=======================
-;Border copy for Y plane
-;copy the left and right most columns out
- sub r5, r1, r3 ;destptr1
- add r6, r1, lr
- sub r6, r6, r3, lsl #1 ;destptr2
- sub r2, r6, #1 ;srcptr2
-
- ;Do four rows at one time
- mov r12, r4, lsr #2
-
-copy_left_right_y_b16
- vld1.8 {d0[], d1[]}, [r1], lr
- vld1.8 {d4[], d5[]}, [r2], lr
- vld1.8 {d8[], d9[]}, [r1], lr
- vld1.8 {d12[], d13[]}, [r2], lr
- vld1.8 {d16[], d17[]}, [r1], lr
- vld1.8 {d20[], d21[]}, [r2], lr
- vld1.8 {d24[], d25[]}, [r1], lr
- vld1.8 {d28[], d29[]}, [r2], lr
-
- subs r12, r12, #1
-
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q4}, [r5], lr
- vst1.8 {q6}, [r6], lr
- vst1.8 {q8}, [r5], lr
- vst1.8 {q10}, [r6], lr
- vst1.8 {q12}, [r5], lr
- vst1.8 {q14}, [r6], lr
-
- bne copy_left_right_y_b16
-
-;Now copy the top and bottom source lines into each line of the respective borders
- ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- mul r8, r3, lr
-
- mov r12, lr, lsr #7
-
- sub r6, r1, r3 ;destptr2
- sub r2, r6, lr ;srcptr2
- sub r1, r7, r3 ;srcptr1
- sub r5, r1, r8 ;destptr1
-
-copy_top_bottom_y_b16
- vld1.8 {q0, q1}, [r1]!
- vld1.8 {q8, q9}, [r2]!
- vld1.8 {q2, q3}, [r1]!
- vld1.8 {q10, q11}, [r2]!
- vld1.8 {q4, q5}, [r1]!
- vld1.8 {q12, q13}, [r2]!
- vld1.8 {q6, q7}, [r1]!
- vld1.8 {q14, q15}, [r2]!
-
- mov r7, r3
-
-top_bottom_16_b16
- subs r7, r7, #1
-
- vst1.8 {q0, q1}, [r5]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q2, q3}, [r5]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q4, q5}, [r5]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q6, q7}, [r5]!
- vst1.8 {q14, q15}, [r6]!
-
- add r5, r5, lr
- sub r5, r5, #128
- add r6, r6, lr
- sub r6, r6, #128
-
- bne top_bottom_16_b16
-
- sub r5, r1, r8
- add r6, r2, lr
-
- subs r12, r12, #1
- bne copy_top_bottom_y_b16
-
- mov r7, lr, lsr #4 ;check to see if extra copy is needed
- ands r7, r7, #0x7
- bne extra_top_bottom_y_b16
-end_of_border_copy_y_b16
-
- vpop {d8 - d15}
- pop {r4 - r11, pc}
-
-;=====================
-;extra copy part for Y
-extra_top_bottom_y_b16
- vld1.8 {q0}, [r1]!
- vld1.8 {q2}, [r2]!
-
- mov r9, r3, lsr #3
-
-extra_top_bottom_16_b16
- subs r9, r9, #1
-
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- vst1.8 {q0}, [r5], lr
- vst1.8 {q2}, [r6], lr
- bne extra_top_bottom_16_b16
-
- sub r5, r1, r8
- add r6, r2, lr
- subs r7, r7, #1
- bne extra_top_bottom_y_b16
-
- b end_of_border_copy_y_b16
-
-;=============================
-extra_cp_src_to_dst_width
- add r2, r2, r11
- add r3, r3, r11
- add r0, r8, r6
- add r11, r9, r7
-
- mov lr, r4, lsr #1
-extra_cp_src_to_dst_height_loop
- mov r8, r2
- mov r9, r3
- add r0, r8, r6
- add r11, r9, r7
-
- mov r12, r10
-
-extra_cp_src_to_dst_width_loop
- vld1.8 {q0}, [r8]!
- vld1.8 {q1}, [r0]!
-
- subs r12, r12, #16
-
- vst1.8 {q0}, [r9]!
- vst1.8 {q1}, [r11]!
- bne extra_cp_src_to_dst_width_loop
-
- subs lr, lr, #1
-
- add r2, r2, r6, lsl #1
- add r3, r3, r7, lsl #1
-
- bne extra_cp_src_to_dst_height_loop
-
- b end_of_cp_src_to_dst
-
- ENDP
-
-;===========================================================
-;In vp8cx_pick_filter_level(), call vp8_yv12_copy_frame_yonly
-;without extend_frame_borders.
-|vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon| PROC
- push {r4 - r11, lr}
- vpush {d8-d15}
-
- ldr r4, [r0, #yv12_buffer_config_y_height]
- ldr r5, [r0, #yv12_buffer_config_y_width]
- ldr r6, [r0, #yv12_buffer_config_y_stride]
- ldr r7, [r1, #yv12_buffer_config_y_stride]
- ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
-
- ; copy two rows at one time
- mov lr, r4, lsr #1
-
-cp_src_to_dst_height_loop1
- mov r8, r2
- mov r9, r3
- add r10, r2, r6
- add r11, r3, r7
- mov r12, r5, lsr #7
-
-cp_src_to_dst_width_loop1
- vld1.8 {q0, q1}, [r8]!
- vld1.8 {q8, q9}, [r10]!
- vld1.8 {q2, q3}, [r8]!
- vld1.8 {q10, q11}, [r10]!
- vld1.8 {q4, q5}, [r8]!
- vld1.8 {q12, q13}, [r10]!
- vld1.8 {q6, q7}, [r8]!
- vld1.8 {q14, q15}, [r10]!
-
- subs r12, r12, #1
-
- vst1.8 {q0, q1}, [r9]!
- vst1.8 {q8, q9}, [r11]!
- vst1.8 {q2, q3}, [r9]!
- vst1.8 {q10, q11}, [r11]!
- vst1.8 {q4, q5}, [r9]!
- vst1.8 {q12, q13}, [r11]!
- vst1.8 {q6, q7}, [r9]!
- vst1.8 {q14, q15}, [r11]!
-
- bne cp_src_to_dst_width_loop1
-
- subs lr, lr, #1
- add r2, r2, r6, lsl #1
- add r3, r3, r7, lsl #1
-
- bne cp_src_to_dst_height_loop1
-
- ands r10, r5, #0x7f ;check to see if extra copy is needed
- sub r11, r5, r10
- ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
- ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
- bne extra_cp_src_to_dst_width1
-end_of_cp_src_to_dst1
-
- vpop {d8 - d15}
- pop {r4-r11, pc}
-
-;=============================
-extra_cp_src_to_dst_width1
- add r2, r2, r11
- add r3, r3, r11
- add r0, r8, r6
- add r11, r9, r7
-
- mov lr, r4, lsr #1
-extra_cp_src_to_dst_height_loop1
- mov r8, r2
- mov r9, r3
- add r0, r8, r6
- add r11, r9, r7
-
- mov r12, r10
-
-extra_cp_src_to_dst_width_loop1
- vld1.8 {q0}, [r8]!
- vld1.8 {q1}, [r0]!
-
- subs r12, r12, #16
-
- vst1.8 {q0}, [r9]!
- vst1.8 {q1}, [r11]!
- bne extra_cp_src_to_dst_width_loop1
-
- subs lr, lr, #1
-
- add r2, r2, r6, lsl #1
- add r3, r3, r7, lsl #1
-
- bne extra_cp_src_to_dst_height_loop1
-
- b end_of_cp_src_to_dst1
-
- ENDP
-
- END
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index 8444b8e03..ebc4242b2 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -75,12 +75,13 @@ copy_left_right_y
mul r8, r4, lr ; plane_height * plane_stride
; copy width is plane_stride
- mov r12, lr, lsr #7 ; plane_stride / 128
+ movs r12, lr, lsr #7 ; plane_stride / 128
sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride))
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
+ ble extra_y_copy_needed ; plane stride < 128
copy_top_bottom_y
vld1.8 {q0, q1}, [r1]!
@@ -119,6 +120,7 @@ top_bottom_32
subs r12, r12, #1
bne copy_top_bottom_y
+extra_y_copy_needed
mov r7, lr, lsr #4 ; check to see if extra copy is needed
ands r7, r7, #0x7
bne extra_top_bottom_y
@@ -184,12 +186,13 @@ copy_left_right_uv
;Now copy the top and bottom source lines into each line of the respective borders
mov r1, r7
mul r8, r4, lr ; plane_height * plane_stride
- mov r12, lr, lsr #6 ; plane_stride / 64
+ movs r12, lr, lsr #6 ; plane_stride / 64
sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride)
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
+ ble extra_uv_copy_needed ; plane_stride < 64
copy_top_bottom_uv
vld1.8 {q0, q1}, [r1]!
@@ -219,7 +222,7 @@ top_bottom_16
subs r12, r12, #1
bne copy_top_bottom_uv
-
+extra_uv_copy_needed
mov r7, lr, lsr #3 ; check to see if extra copy is needed
ands r7, r7, #0x7
bne extra_top_bottom_uv
diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c
index 94f499cf7..eabd4951a 100644
--- a/vpx_scale/arm/neon/yv12extend_arm.c
+++ b/vpx_scale/arm/neon/yv12extend_arm.c
@@ -8,17 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_rtcd.h"
-#include "vpx_scale/yv12config.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/vpxscale.h"
+extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
+ struct yv12_buffer_config *dst_ybc);
-void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
-
-void
-vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) {
+void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc,
+ struct yv12_buffer_config *dst_ybc) {
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
- // printf("Border:%d; plane_stride:%d; plane_height:%d; plane_width:%d\n",dst_ybc->border,dst_ybc->y_stride,dst_ybc->y_height,dst_ybc->y_width);
- vp8_yv12_extend_frame_borders_ptr(dst_ybc);
+ vp8_yv12_extend_frame_borders_neon(dst_ybc);
}