summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
authorTero Rintaluoma <teror@google.com>2011-02-10 16:41:22 +0200
committerTero Rintaluoma <teror@google.com>2011-02-11 11:14:07 +0200
commit1ef86980b9e39ac2775e681f839cdeba77404357 (patch)
tree1dd51e526ca956fd54dfd10714d7f80de9bbed2b /vp8/encoder
parent7d8199f0c3422e6bd63bef4b9992ce0b2e6b74b5 (diff)
downloadlibvpx-1ef86980b9e39ac2775e681f839cdeba77404357.tar
libvpx-1ef86980b9e39ac2775e681f839cdeba77404357.tar.gz
libvpx-1ef86980b9e39ac2775e681f839cdeba77404357.tar.bz2
libvpx-1ef86980b9e39ac2775e681f839cdeba77404357.zip
ARMv6 optimized sad16x16
Adds a new ARMv6 optimized function vp8_sad16x16_armv6 to encoder. Change-Id: Ibbd7edb8b25cb7a5b522d391b1e9a690fe150e57
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c4
-rw-r--r--vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm84
-rw-r--r--vp8/encoder/arm/variance_arm.h4
3 files changed, 90 insertions, 2 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 6c17a7984..cec35d548 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -29,8 +29,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
#if HAVE_ARMV6
if (has_media)
{
- /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
+ cpi->rtcd.variance.sad16x16 = vp8_sad16x16_armv6;
+ /*cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/
diff --git a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
new file mode 100644
index 000000000..c759f7c65
--- /dev/null
+++ b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
@@ -0,0 +1,84 @@
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_sad16x16_armv6|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 const unsigned char *src_ptr
+; r1 int src_stride
+; r2 const unsigned char *ref_ptr
+; r3 int ref_stride
+; stack max_sad (not used)
+|vp8_sad16x16_armv6| PROC
+ stmfd sp!, {r4-r12, lr}
+ mov r4, #0 ; sad = 0;
+ mov r5, #8 ; loop count
+
+loop
+ ; 1st row
+ ldr r6, [r0, #0x0] ; load 4 src pixels (1A)
+ ldr r8, [r2, #0x0] ; load 4 ref pixels (1A)
+ ldr r7, [r0, #0x4] ; load 4 src pixels (1A)
+ ldr r9, [r2, #0x4] ; load 4 ref pixels (1A)
+ ldr r10, [r0, #0x8] ; load 4 src pixels (1B)
+ ldr r11, [r0, #0xC] ; load 4 src pixels (1B)
+
+ usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels
+ usad8 r8, r7, r9 ; calculate sad for 4 pixels
+
+ ldr r12, [r2, #0x8] ; load 4 ref pixels (1B)
+ ldr lr, [r2, #0xC] ; load 4 ref pixels (1B)
+
+ add r0, r0, r1 ; set src pointer to next row
+ add r2, r2, r3 ; set dst pointer to next row
+
+ usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
+ usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
+
+ ldr r6, [r0, #0x0] ; load 4 src pixels (2A)
+ ldr r7, [r0, #0x4] ; load 4 src pixels (2A)
+ add r4, r4, r8 ; add partial sad values
+
+ ; 2nd row
+ ldr r8, [r2, #0x0] ; load 4 ref pixels (2A)
+ ldr r9, [r2, #0x4] ; load 4 ref pixels (2A)
+ ldr r10, [r0, #0x8] ; load 4 src pixels (2B)
+ ldr r11, [r0, #0xC] ; load 4 src pixels (2B)
+
+ usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels
+ usad8 r8, r7, r9 ; calculate sad for 4 pixels
+
+ ldr r12, [r2, #0x8] ; load 4 ref pixels (2B)
+ ldr lr, [r2, #0xC] ; load 4 ref pixels (2B)
+
+ add r0, r0, r1 ; set src pointer to next row
+ add r2, r2, r3 ; set dst pointer to next row
+
+ usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
+ usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
+
+ subs r5, r5, #1 ; decrement loop counter
+ add r4, r4, r8 ; add partial sad values
+
+ bne loop
+
+ mov r0, r4 ; return sad
+ ldmfd sp!, {r4-r12, pc}
+
+ ENDP
+
+ END
+
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 06d72873e..c807e29c0 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -14,11 +14,15 @@
#if HAVE_ARMV6
+extern prototype_sad(vp8_sad16x16_armv6);
extern prototype_variance(vp8_variance16x16_armv6);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_variance_sad16x16
+#define vp8_variance_sad16x16 vp8_sad16x16_armv6
+
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6