diff options
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm | 262 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/walsh_v6.asm | 212 | ||||
-rw-r--r-- | vp8/encoder/arm/dct_arm.c | 21 |
3 files changed, 0 insertions, 495 deletions
diff --git a/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm b/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm deleted file mode 100644 index 8034c1db9..000000000 --- a/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm +++ /dev/null @@ -1,262 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_fdct4x4_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY -; void vp8_short_fdct4x4_c(short *input, short *output, int pitch) -|vp8_short_fdct4x4_armv6| PROC - - stmfd sp!, {r4 - r12, lr} - - ; PART 1 - - ; coeffs 0-3 - ldrd r4, r5, [r0] ; [i1 | i0] [i3 | i2] - - ldr r10, c7500 - ldr r11, c14500 - ldr r12, c0x22a453a0 ; [2217*4 | 5352*4] - ldr lr, c0x00080008 - ror r5, r5, #16 ; [i2 | i3] - - qadd16 r6, r4, r5 ; [i1+i2 | i0+i3] = [b1 | a1] without shift - qsub16 r7, r4, r5 ; [i1-i2 | i0-i3] = [c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2*[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r4, r6, lr ; o0 = (i1+i2)*8 + (i0+i3)*8 - smusd r5, r6, lr ; o2 = (i1+i2)*8 - (i0+i3)*8 - - smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6] - - pkhbt r3, r4, r6, lsl #4 ; [o1 | o0], keep in register for PART 2 - pkhbt r6, r5, r7, lsl #4 ; [o3 | o2] - - str r6, [r1, #4] - - ; coeffs 4-7 - ror r9, r9, #16 ; [i6 | i7] - - qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift - qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r9, r6, lr ; o4 = (i5+i6)*8 + (i4+i7)*8 - smusd r8, r6, lr ; o6 = (i5+i6)*8 - (i4+i7)*8 - - smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r4, r5, [r0] ; [i9 | i8] [i11 | i10] - - pkhbt r9, r9, r6, lsl #4 ; [o5 | o4], keep in register for PART 2 - pkhbt r6, r8, r7, lsl #4 ; [o7 | o6] - - str r6, [r1, #12] - - ; coeffs 8-11 - ror r5, r5, #16 ; [i10 | i11] - - qadd16 r6, r4, r5 ; [i9+i10 | i8+i11]=[b1 | a1] without shift - qsub16 r7, r4, r5 ; [i9-i10 | i8-i11]=[c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r2, r6, lr ; o8 = (i9+i10)*8 + (i8+i11)*8 - smusd r8, r6, lr ; o10 = (i9+i10)*8 - (i8+i11)*8 - - smlad r6, r7, r12, r11 ; o9 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o11 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r4, r5, [r0] ; [i13 | i12] [i15 | i14] - - pkhbt r2, r2, r6, lsl #4 ; [o9 | o8], keep in register for PART 2 - pkhbt r6, r8, r7, lsl #4 ; [o11 | o10] - - str r6, [r1, #20] - - ; coeffs 12-15 - ror r5, r5, #16 ; [i14 | i15] - - qadd16 r6, r4, r5 ; [i13+i14 | i12+i15]=[b1|a1] without shift - qsub16 r7, r4, r5 ; [i13-i14 | i12-i15]=[c1|d1] without shift - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r4, r6, lr ; o12 = (i13+i14)*8 + (i12+i15)*8 - smusd r5, r6, lr ; o14 = (i13+i14)*8 - (i12+i15)*8 - - smlad r6, r7, r12, r11 ; o13 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o15 = (d1 * 2217 - c1 * 5352 + 7500) - - pkhbt r0, r4, r6, lsl #4 ; [o13 | o12], keep in register for PART 2 - pkhbt r6, r5, r7, lsl #4 ; [o15 | o14] - - str r6, [r1, #28] - - - ; PART 2 ------------------------------------------------- - ldr r11, c12000 - ldr r10, c51000 - ldr lr, c0x00070007 - - qadd16 r4, r3, r0 ; a1 = [i1+i13 | i0+i12] - qadd16 r5, r9, r2 ; b1 = [i5+i9 | i4+i8] - qsub16 r6, r9, r2 ; c1 = [i5-i9 | i4-i8] - qsub16 r7, r3, r0 ; d1 = [i1-i13 | i0-i12] - - qadd16 r4, r4, lr ; a1 + 7 - - add r0, r11, #0x10000 ; add (d!=0) - - qadd16 r2, r4, r5 ; a1 + b1 + 7 - qsub16 r3, r4, r5 ; a1 - b1 + 7 - - ldr r12, c0x08a914e8 ; [2217 | 5352] - - lsl r8, r2, #16 ; prepare bottom halfword for scaling - asr r2, r2, #4 ; scale top halfword - lsl r9, r3, #16 ; prepare bottom halfword for scaling - asr r3, r3, #4 ; scale top halfword - pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword - pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword - - smulbt r2, r6, r12 ; [ ------ | c1*2217] - str r4, [r1, #0] ; [ o1 | o0] - smultt r3, r6, r12 ; [c1*2217 | ------ ] - str r5, [r1, #16] ; [ o9 | o8] - - smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] - smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ] - - smulbb r2, r6, r12 ; [ ------ | c1*5352] - smultb r3, r6, r12 ; [c1*5352 | ------ ] - - lsls r6, r7, #16 ; d1 != 0 ? - addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0) - addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0) - asrs r6, r7, #16 - addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0) - addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0) - - smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000 - smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000 - - pkhtb r9, r9, r8, asr #16 - - sub r4, r4, r2 - sub r5, r5, r3 - - ldr r3, [r1, #4] ; [i3 | i2] - - pkhtb r5, r5, r4, asr #16 ; [o13|o12] - - str r9, [r1, #8] ; [o5 | 04] - - ldr r9, [r1, #12] ; [i7 | i6] - ldr r8, [r1, #28] ; [i15|i14] - ldr r2, [r1, #20] ; [i11|i10] - str r5, [r1, #24] ; [o13|o12] - - qadd16 r4, r3, r8 ; a1 = [i3+i15 | i2+i14] - qadd16 r5, r9, r2 ; b1 = [i7+i11 | i6+i10] - - qadd16 r4, r4, lr ; a1 + 7 - - qsub16 r6, r9, r2 ; c1 = [i7-i11 | i6-i10] - qadd16 r2, r4, r5 ; a1 + b1 + 7 - qsub16 r7, r3, r8 ; d1 = [i3-i15 | i2-i14] - qsub16 r3, r4, r5 ; a1 - b1 + 7 - - lsl r8, r2, #16 ; prepare bottom halfword for scaling - asr r2, r2, #4 ; scale top halfword - lsl r9, r3, #16 ; prepare bottom halfword for scaling - asr r3, r3, #4 ; scale top halfword - pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword - pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword - - smulbt r2, r6, r12 ; [ ------ | c1*2217] - str r4, [r1, #4] ; [ o3 | o2] - smultt r3, r6, r12 ; [c1*2217 | ------ ] - str r5, [r1, #20] ; [ o11 | o10] - - smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] - smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ] - - smulbb r2, r6, r12 ; [ ------ | c1*5352] - smultb r3, r6, r12 ; [c1*5352 | ------ ] - - lsls r6, r7, #16 ; d1 != 0 ? - addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0) - addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0) - - asrs r6, r7, #16 - addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0) - addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0) - - smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000 - smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000 - - pkhtb r9, r9, r8, asr #16 - - sub r4, r4, r2 - sub r5, r5, r3 - - str r9, [r1, #12] ; [o7 | o6] - pkhtb r5, r5, r4, asr #16 ; [o15|o14] - - str r5, [r1, #28] ; [o15|o14] - - ldmfd sp!, {r4 - r12, pc} - - ENDP - -; Used constants -c7500 - DCD 7500 -c14500 - DCD 14500 -c0x22a453a0 - DCD 0x22a453a0 -c0x00080008 - DCD 0x00080008 -c12000 - DCD 12000 -c51000 - DCD 51000 -c0x00070007 - DCD 0x00070007 -c0x08a914e8 - DCD 0x08a914e8 - - END diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm deleted file mode 100644 index 5eaf3f25a..000000000 --- a/vp8/encoder/arm/armv6/walsh_v6.asm +++ /dev/null @@ -1,212 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_walsh4x4_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code - -;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch) -; r0 short *input, -; r1 short *output, -; r2 int pitch -|vp8_short_walsh4x4_armv6| PROC - - stmdb sp!, {r4 - r11, lr} - - ldrd r4, r5, [r0], r2 - ldr lr, c00040004 - ldrd r6, r7, [r0], r2 - - ; 0-3 - qadd16 r3, r4, r5 ; [d1|a1] [1+3 | 0+2] - qsub16 r4, r4, r5 ; [c1|b1] [1-3 | 0-2] - - ldrd r8, r9, [r0], r2 - ; 4-7 - qadd16 r5, r6, r7 ; [d1|a1] [5+7 | 4+6] - qsub16 r6, r6, r7 ; [c1|b1] [5-7 | 4-6] - - ldrd r10, r11, [r0] - ; 8-11 - qadd16 r7, r8, r9 ; [d1|a1] [9+11 | 8+10] - qsub16 r8, r8, r9 ; [c1|b1] [9-11 | 8-10] - - ; 12-15 - qadd16 r9, r10, r11 ; [d1|a1] [13+15 | 12+14] - qsub16 r10, r10, r11 ; [c1|b1] [13-15 | 12-14] - - - lsls r2, r3, #16 - smuad r11, r3, lr ; A0 = a1<<2 + d1<<2 - addne r11, r11, #1 ; A0 += (a1!=0) - - lsls r2, r7, #16 - smuad r12, r7, lr ; C0 = a1<<2 + d1<<2 - addne r12, r12, #1 ; C0 += (a1!=0) - - add r0, r11, r12 ; a1_0 = A0 + C0 - sub r11, r11, r12 ; b1_0 = A0 - C0 - - lsls r2, r5, #16 - smuad r12, r5, lr ; B0 = a1<<2 + d1<<2 - addne r12, r12, #1 ; B0 += (a1!=0) - - lsls r2, r9, #16 - smuad r2, r9, lr ; D0 = a1<<2 + d1<<2 - addne r2, r2, #1 ; D0 += (a1!=0) - - add lr, r12, r2 ; d1_0 = B0 + D0 - sub r12, r12, r2 ; c1_0 = B0 - D0 - - ; op[0,4,8,12] - adds r2, r0, lr ; a2 = a1_0 + d1_0 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r0, r0, lr ; d2 = a1_0 - d1_0 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1] ; op[0] - - addmi r0, r0, #1 ; += a2 < 0 - add r0, r0, #3 ; += 3 - ldr lr, c00040004 - mov r0, r0, asr #3 ; >> 3 - strh r0, [r1, #24] ; op[12] - - adds r2, r11, r12 ; b2 = b1_0 + c1_0 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r0, r11, r12 ; c2 = b1_0 - c1_0 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #8] ; op[4] - - addmi r0, r0, #1 ; += a2 < 0 - add r0, r0, #3 ; += 3 - smusd r3, r3, lr ; A3 = a1<<2 - d1<<2 - smusd r7, r7, lr ; C3 = a1<<2 - d1<<2 - mov r0, r0, asr #3 ; >> 3 - strh r0, [r1, #16] ; op[8] - - - ; op[3,7,11,15] - add r0, r3, r7 ; a1_3 = A3 + C3 - sub r3, r3, r7 ; b1_3 = A3 - C3 - - smusd r5, r5, lr ; B3 = a1<<2 - d1<<2 - smusd r9, r9, lr ; D3 = a1<<2 - d1<<2 - add r7, r5, r9 ; d1_3 = B3 + D3 - sub r5, r5, r9 ; c1_3 = B3 - D3 - - adds r2, r0, r7 ; a2 = a1_3 + d1_3 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r3, r5 ; b2 = b1_3 + c1_3 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #6] ; op[3] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r3, r5 ; c2 = b1_3 - c1_3 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #14] ; op[7] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r0, r7 ; d2 = a1_3 - d1_3 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #22] ; op[11] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - smuad r3, r4, lr ; A1 = b1<<2 + c1<<2 - smuad r5, r8, lr ; C1 = b1<<2 + c1<<2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #30] ; op[15] - - ; op[1,5,9,13] - add r0, r3, r5 ; a1_1 = A1 + C1 - sub r3, r3, r5 ; b1_1 = A1 - C1 - - smuad r7, r6, lr ; B1 = b1<<2 + c1<<2 - smuad r9, r10, lr ; D1 = b1<<2 + c1<<2 - add r5, r7, r9 ; d1_1 = B1 + D1 - sub r7, r7, r9 ; c1_1 = B1 - D1 - - adds r2, r0, r5 ; a2 = a1_1 + d1_1 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r3, r7 ; b2 = b1_1 + c1_1 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #2] ; op[1] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r3, r7 ; c2 = b1_1 - c1_1 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #10] ; op[5] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r0, r5 ; d2 = a1_1 - d1_1 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #18] ; op[9] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - smusd r4, r4, lr ; A2 = b1<<2 - c1<<2 - smusd r8, r8, lr ; C2 = b1<<2 - c1<<2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #26] ; op[13] - - - ; op[2,6,10,14] - add r11, r4, r8 ; a1_2 = A2 + C2 - sub r12, r4, r8 ; b1_2 = A2 - C2 - - smusd r6, r6, lr ; B2 = b1<<2 - c1<<2 - smusd r10, r10, lr ; D2 = b1<<2 - c1<<2 - add r4, r6, r10 ; d1_2 = B2 + D2 - sub r8, r6, r10 ; c1_2 = B2 - D2 - - adds r2, r11, r4 ; a2 = a1_2 + d1_2 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r12, r8 ; b2 = b1_2 + c1_2 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #4] ; op[2] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r12, r8 ; c2 = b1_2 - c1_2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #12] ; op[6] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r11, r4 ; d2 = a1_2 - d1_2 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #20] ; op[10] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #28] ; op[14] - - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_short_walsh4x4_armv6| - -c00040004 - DCD 0x00040004 - - END diff --git a/vp8/encoder/arm/dct_arm.c b/vp8/encoder/arm/dct_arm.c deleted file mode 100644 index 983dd217a..000000000 --- a/vp8/encoder/arm/dct_arm.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" - -#if HAVE_MEDIA - -void vp8_short_fdct8x4_armv6(short *input, short *output, int pitch) { - vp8_short_fdct4x4_armv6(input, output, pitch); - vp8_short_fdct4x4_armv6(input + 4, output + 16, pitch); -} - -#endif /* HAVE_MEDIA */ |