diff options
author | John Koleszar <jkoleszar@google.com> | 2011-02-17 00:05:14 -0500 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2011-02-17 00:05:14 -0500 |
commit | c88dbb2dced70914eb60e9a341c0f219526116aa (patch) | |
tree | fd4b97990352c8cd192e894362a026fefea4d6ef /vp8/encoder | |
parent | eb6d3a1ead99dc62536dd839356af623f138951a (diff) | |
parent | da9402fbf6ffb5d0ea5cb7f07598a81c6fedd7a1 (diff) | |
download | libvpx-c88dbb2dced70914eb60e9a341c0f219526116aa.tar libvpx-c88dbb2dced70914eb60e9a341c0f219526116aa.tar.gz libvpx-c88dbb2dced70914eb60e9a341c0f219526116aa.tar.bz2 libvpx-c88dbb2dced70914eb60e9a341c0f219526116aa.zip |
Merge remote branch 'internal/upstream' into HEAD
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/arm/arm_csystemdependent.c | 4 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm | 84 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.h | 4 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 6 |
4 files changed, 94 insertions, 4 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index 6c17a7984..cec35d548 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -29,8 +29,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) #if HAVE_ARMV6 if (has_media) { - /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; + cpi->rtcd.variance.sad16x16 = vp8_sad16x16_armv6; + /*cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/ diff --git a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm new file mode 100644 index 000000000..c759f7c65 --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm @@ -0,0 +1,84 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_sad16x16_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 const unsigned char *src_ptr +; r1 int src_stride +; r2 const unsigned char *ref_ptr +; r3 int ref_stride +; stack max_sad (not used) +|vp8_sad16x16_armv6| PROC + stmfd sp!, {r4-r12, lr} + mov r4, #0 ; sad = 0; + mov r5, #8 ; loop count + +loop + ; 1st row + ldr r6, [r0, #0x0] ; load 4 src pixels (1A) + ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) + ldr r7, [r0, #0x4] ; load 4 src pixels (1A) + ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) + ldr r10, [r0, #0x8] ; load 4 src pixels (1B) + ldr r11, [r0, #0xC] ; load 4 src pixels (1B) + + usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels + usad8 r8, r7, r9 ; calculate sad for 4 pixels + + ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) + ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) + + add r0, r0, r1 ; set src pointer to next row + add r2, r2, r3 ; set dst pointer to next row + + usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels + usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels + + ldr r6, [r0, #0x0] ; load 4 src pixels (2A) + ldr r7, [r0, #0x4] ; load 4 src pixels (2A) + add r4, r4, r8 ; add partial sad values + + ; 2nd row + ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) + ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) + ldr r10, [r0, #0x8] ; load 4 src pixels (2B) + ldr r11, [r0, #0xC] ; load 4 src pixels (2B) + + usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels + usad8 r8, r7, r9 ; calculate sad for 4 pixels + + ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) + ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) + + add r0, r0, r1 ; set src pointer to next row + add r2, r2, r3 ; set dst pointer to next row + + usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels + usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels + + subs r5, r5, #1 ; decrement loop counter + add r4, r4, r8 ; add partial sad values + + bne loop + + mov r0, r4 ; return sad + ldmfd sp!, {r4-r12, pc} + + ENDP + + END + diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index 06d72873e..c807e29c0 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -14,11 +14,15 @@ #if HAVE_ARMV6 +extern prototype_sad(vp8_sad16x16_armv6); extern prototype_variance(vp8_variance16x16_armv6); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); #if !CONFIG_RUNTIME_CPU_DETECT +#undef vp8_variance_sad16x16 +#define vp8_variance_sad16x16 vp8_sad16x16_armv6 + #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6 diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 5633f0b81..b0912941e 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1333,6 +1333,8 @@ void vp8_set_speed_features(VP8_COMP *cpi) static void alloc_raw_frame_buffers(VP8_COMP *cpi) { int i, buffers; + /* allocate source_buffer to be multiples of 16 */ + int width = (cpi->oxcf.Width + 15) & ~15; buffers = cpi->oxcf.lag_in_frames; @@ -1344,7 +1346,7 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi) for (i = 0; i < buffers; i++) if (vp8_yv12_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer, - cpi->oxcf.Width, cpi->oxcf.Height, + width, cpi->oxcf.Height, 16)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffer"); @@ -1352,7 +1354,7 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi) #if VP8_TEMPORAL_ALT_REF if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer, - cpi->oxcf.Width, cpi->oxcf.Height, 16)) + width, cpi->oxcf.Height, 16)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); |