summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
authorAttila Nagy <attilanagy@google.com>2011-03-09 14:26:24 +0200
committerAttila Nagy <attilanagy@google.com>2011-03-15 15:50:44 +0200
commit71bcd9f1af91f2dea22f2e71839039d83e5b1d84 (patch)
treeb6c2a0a7b76c6025a8baccdfcf596ac495d77c3a /vp8/encoder
parent6795e256c1b8e2841e19f4650f443e84427c21c6 (diff)
downloadlibvpx-71bcd9f1af91f2dea22f2e71839039d83e5b1d84.tar
libvpx-71bcd9f1af91f2dea22f2e71839039d83e5b1d84.tar.gz
libvpx-71bcd9f1af91f2dea22f2e71839039d83e5b1d84.tar.bz2
libvpx-71bcd9f1af91f2dea22f2e71839039d83e5b1d84.zip
Add vp8_variance8x8_armv6 and vp8_sub_pixel_variance8x8_armv6 functions
Change-Id: I08edaffc62514907fa5e90e1689269e467c857f5
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c12
-rw-r--r--vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm95
-rw-r--r--vp8/encoder/arm/variance_arm.c28
-rw-r--r--vp8/encoder/arm/variance_arm.h8
4 files changed, 137 insertions, 6 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 5ba14f375..a661a89a4 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -35,15 +35,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/
- /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
+ /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/
+ cpi->rtcd.variance.var8x8 = vp8_variance8x8_armv6;
+ /*cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;*/
cpi->rtcd.variance.var16x16 = vp8_variance16x16_armv6;
- /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
+ /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/
+ cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_armv6;
+ /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6;
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
diff --git a/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
new file mode 100644
index 000000000..7daecb925
--- /dev/null
+++ b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
@@ -0,0 +1,95 @@
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_variance8x8_armv6|
+
+ ARM
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 unsigned char *src_ptr
+; r1 int source_stride
+; r2 unsigned char *ref_ptr
+; r3 int recon_stride
+; stack unsigned int *sse
+|vp8_variance8x8_armv6| PROC
+
+ push {r4-r10, lr}
+ mov r12, #8 ; set loop counter to 8 (=block height)
+ mov r4, #0 ; initialize sum = 0
+ mov r5, #0 ; initialize sse = 0
+
+loop
+ ; 1st 4 pixels
+ ldr r6, [r0, #0x0] ; load 4 src pixels
+ ldr r7, [r2, #0x0] ; load 4 ref pixels
+
+ mov lr, #0 ; constant zero
+
+ usub8 r8, r6, r7 ; calculate difference
+ sel r10, r8, lr ; select bytes with positive difference
+ usub8 r9, r7, r6 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r6, r10, lr ; calculate sum of positive differences
+ usad8 r7, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r10 ; differences of all 4 pixels
+ ; calculate total sum
+ add r4, r4, r6 ; add positive differences to sum
+ sub r4, r4, r7 ; substract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r7, r8 ; byte (two pixels) to halfwords
+ uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
+ smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
+
+ ; 2nd 4 pixels
+ ldr r6, [r0, #0x4] ; load 4 src pixels
+ ldr r7, [r2, #0x4] ; load 4 ref pixels
+ smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r6, r7 ; calculate difference
+ add r0, r0, r1 ; set src_ptr to next row
+ sel r10, r8, lr ; select bytes with positive difference
+ usub8 r9, r7, r6 ; calculate difference with reversed operands
+ add r2, r2, r3 ; set dst_ptr to next row
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r6, r10, lr ; calculate sum of positive differences
+ usad8 r7, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r10 ; differences of all 4 pixels
+
+ ; calculate total sum
+ add r4, r4, r6 ; add positive differences to sum
+ sub r4, r4, r7 ; substract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r7, r8 ; byte (two pixels) to halfwords
+ uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
+ smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
+ subs r12, r12, #1 ; next row
+ smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
+
+ bne loop
+
+ ; return stuff
+ ldr r8, [sp, #32] ; get address of sse
+ mul r1, r4, r4 ; sum * sum
+ str r5, [r8] ; store sse
+ sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
+
+ pop {r4-r10, pc}
+
+ ENDP
+
+ END
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index 64d76bcf8..ed1fb16d5 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -15,6 +15,34 @@
#if HAVE_ARMV6
+unsigned int vp8_sub_pixel_variance8x8_armv6
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+)
+{
+ unsigned short first_pass[10*8];
+ unsigned char second_pass[8*8];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
+
+ vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,
+ src_pixels_per_line,
+ 9, 8, HFilter);
+ vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
+ 8, 8, 8, VFilter);
+
+ return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
+ dst_pixels_per_line, sse);
+}
+
unsigned int vp8_sub_pixel_variance16x16_armv6
(
const unsigned char *src_ptr,
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 7ad7c76d3..86de27476 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -16,7 +16,9 @@
extern prototype_sad(vp8_sad16x16_armv6);
extern prototype_variance(vp8_variance16x16_armv6);
+extern prototype_variance(vp8_variance8x8_armv6);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
+extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
@@ -30,12 +32,18 @@ extern prototype_variance(vp8_mse16x16_armv6);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6
+#undef vp8_variance_subpixvar8x8
+#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_armv6
+
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_armv6
+#undef vp8_variance_var8x8
+#define vp8_variance_var8x8 vp8_variance8x8_armv6
+
#undef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6