diff options
Diffstat (limited to 'vp8/encoder/arm')
31 files changed, 362 insertions, 2024 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c new file mode 100644 index 000000000..a1f110260 --- /dev/null +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "variance.h" +#include "onyx_int.h" + +extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); +extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); +extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); + +void vp8_arch_arm_encoder_init(VP8_COMP *cpi) +{ +#if CONFIG_RUNTIME_CPU_DETECT + int flags = cpi->common.rtcd.flags; + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + +#if HAVE_ARMV6 + if (has_media) + { + /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; + cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; + cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; + cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; + cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/ + + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; + cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; + cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; + cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; + cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/ + + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; + cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/ + + /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; + cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + + /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; + cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; + cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; + cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/ + + /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; + cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; + cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;*/ + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6; + + /*cpi->rtcd.encodemb.berr = vp8_block_error_c; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; + cpi->rtcd.encodemb.subb = vp8_subtract_b_c; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/ + + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/ + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon; + cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon; + cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon; + cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon; + cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon; + + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/ + cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon; + cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon; + cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon; + cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon; + + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/ + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon; + /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon; + + cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; + /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + + cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; + /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; + cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/ + cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; + + cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon; + cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon; + cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon; + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon; + + /*cpi->rtcd.encodemb.berr = vp8_block_error_c; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/ + cpi->rtcd.encodemb.subb = vp8_subtract_b_neon; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; + + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/ + /* The neon quantizer has not been updated to match the new exact + * quantizer introduced in commit e04e2935 + */ + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/ + } +#endif + +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_neon) +#endif + { + vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; + } +#endif +#endif +} diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm index 9a5f36661..e78dc3322 100644 --- a/vp8/encoder/arm/neon/boolhuff_armv7.asm +++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -204,17 +205,10 @@ token_count_lt_zero_se ldr r5, [r0, #vp8_writer_range] ldr r3, [r0, #vp8_writer_count] - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r11, r1 rsb r4, r10, #32 ; 32-n ; v is kept in r1 during the token pack loop - lsr r1, r11, r4 ; v >>= 32 - n + lsl r1, r1, r4 ; r1 = v << 32 - n encode_value_loop sub r7, r5, #1 ; range-1 @@ -222,7 +216,7 @@ encode_value_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r1, r1, #1 ; bit = v >> n + lsls r1, r1, #1 ; bit = v >> n mov r4, r7, lsl #7 ; ((range-1) * 128) mov r7, #1 diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm index 9c52c52f6..3233d2a96 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8cx_pack_tokens_armv7| + EXPORT |vp8cx_pack_tokens_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -24,7 +25,7 @@ ; r3 vp8_coef_encodings ; s0 vp8_extra_bits ; s1 vp8_coef_tree -|vp8cx_pack_tokens_armv7| PROC +|vp8cx_pack_tokens_armv5| PROC push {r4-r11, lr} ; Add size of xcount * sizeof (TOKENEXTRA) to get stop @@ -56,18 +57,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #52] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -77,7 +71,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -171,16 +165,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm index 92b098909..a9b552ae1 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8cx_pack_mb_row_tokens_armv7| + EXPORT |vp8cx_pack_mb_row_tokens_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -24,7 +25,7 @@ ; r3 vp8_extra_bits ; s0 vp8_coef_tree -|vp8cx_pack_mb_row_tokens_armv7| PROC +|vp8cx_pack_mb_row_tokens_armv5| PROC push {r4-r11, lr} sub sp, sp, #24 @@ -77,18 +78,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #60] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -98,7 +92,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -192,16 +186,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 6d5f882ed..0835164e5 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8cx_pack_tokens_into_partitions_armv7| + EXPORT |vp8cx_pack_tokens_into_partitions_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -26,7 +27,7 @@ ; s1 vp8_extra_bits, ; s2 const vp8_tree_index *, -|vp8cx_pack_tokens_into_partitions_armv7| PROC +|vp8cx_pack_tokens_into_partitions_armv5| PROC push {r4-r11, lr} sub sp, sp, #44 @@ -105,18 +106,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #88] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -126,7 +120,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -220,16 +214,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm index 608c9ae65..61ffdb315 100644 --- a/vp8/encoder/arm/armv6/walsh_v6.asm +++ b/vp8/encoder/arm/armv6/walsh_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vp8_short_walsh4x4_armv6| diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c index e70b3ad47..fe8e70c16 100644 --- a/vp8/encoder/arm/boolhuff_arm.c +++ b/vp8/encoder/arm/boolhuff_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c deleted file mode 100644 index 003979680..000000000 --- a/vp8/encoder/arm/csystemdependent.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "variance.h" -#include "onyx_int.h" - -void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); - -void vp8_cmachine_specific_config(VP8_COMP *cpi) -{ -#if CONFIG_RUNTIME_CPU_DETECT - cpi->rtcd.common = &cpi->common.rtcd; - -#if HAVE_ARMV7 - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_neon; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon; -#elif HAVE_ARMV6 - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#else - //pure c - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#endif -#endif - -#if HAVE_ARMV7 - vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; -#else - vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; -#endif -} diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h index a671862fb..41fa5d192 100644 --- a/vp8/encoder/arm/dct_arm.h +++ b/vp8/encoder/arm/dct_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,9 +15,11 @@ #if HAVE_ARMV6 extern prototype_fdct(vp8_short_walsh4x4_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_fdct(vp8_short_fdct4x4_neon); @@ -25,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon); extern prototype_fdct(vp8_fast_fdct8x4_neon); extern prototype_fdct(vp8_short_walsh4x4_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon @@ -39,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon); #undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon +#endif #endif diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c index 3f1d05391..cc9e014b2 100644 --- a/vp8/encoder/arm/encodemb_arm.c +++ b/vp8/encoder/arm/encodemb_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h index 28f9e5c5f..8fe453735 100644 --- a/vp8/encoder/arm/encodemb_arm.h +++ b/vp8/encoder/arm/encodemb_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -29,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon); //#undef vp8_encodemb_mbuverr //#define vp8_encodemb_mbuverr vp8_mbuverror_c +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_encodemb_subb #define vp8_encodemb_subb vp8_subtract_b_neon @@ -37,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon); #undef vp8_encodemb_submbuv #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon +#endif #endif diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c deleted file mode 100644 index 07f218605..000000000 --- a/vp8/encoder/arm/mcomp_arm.c +++ /dev/null @@ -1,1662 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "mcomp.h" -#include "vpx_mem/vpx_mem.h" - -#include <stdio.h> -#include <limits.h> -#include <math.h> - -#ifdef ENTROPY_STATS -static int mv_ref_ct [31] [4] [2]; -static int mv_mode_cts [4] [2]; -#endif - -static int mv_bits_sadcost[256]; - -extern unsigned int vp8_sub_pixel_variance16x16s_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); - -void vp8cx_init_mv_bits_sadcost() -{ - int i; - - for (i = 0; i < 256; i++) - { - mv_bits_sadcost[i] = (int)sqrt(i * 16); - } -} - - -int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight) -{ - // MV costing is based on the distribution of vectors in the previous frame and as such will tend to - // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the - // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks. - // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors. - return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7; -} - -int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit) -{ - //int i; - //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8; - //return ( (vp8_mv_bit_cost(mv, ref, mvcost, 100) + 128) * error_per_bit) >> 8; - - //i = (vp8_mv_bit_cost(mv, ref, mvcost, 100) * error_per_bit + 128) >> 8; - return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8; - //return (vp8_mv_bit_cost(mv, ref, mvcost, 128) * error_per_bit + 128) >> 8; -} - - -static int mv_bits(MV *mv, MV *ref, int *mvcost[2]) -{ - // get the estimated number of bits for a motion vector, to be used for costing in SAD based - // motion estimation - return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8; -} - -void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) -{ - int Len; - int search_site_count = 0; - - - // Generate offsets for 4 search sites per step. - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; - - while (Len > 0) - { - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - - // Contract. - Len /= 2; - } - - x->ss_count = search_site_count; - x->searches_per_step = 4; -} - -void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) -{ - int Len; - int search_site_count = 0; - - // Generate offsets for 8 search sites per step. - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; - - while (Len > 0) - { - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride - Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride + Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride - Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride + Len; - search_site_count++; - - - // Contract. - Len /= 2; - } - - x->ss_count = search_site_count; - x->searches_per_step = 8; -} - - -#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector -#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc -#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. -#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; -#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) - -//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } - -int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - - int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1; - int br = bestmv->row << 2, bc = bestmv->col << 2; - int tr = br, tc = bc; - unsigned int besterr = INT_MAX; - unsigned int left, right, up, down, diag; - unsigned int sse; - unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - - int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1)); - int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1)); - int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1)); - int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1)); - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - - // calculate central point error - besterr = vf(y, d->pre_stride, z, b->src_stride, &sse); - besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) - while (--halfiters) - { - // 1/2 pel - CHECK_BETTER(left, tr, tc - 2); - CHECK_BETTER(right, tr, tc + 2); - CHECK_BETTER(up, tr - 2, tc); - CHECK_BETTER(down, tr + 2, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) - { - case 0: - CHECK_BETTER(diag, tr - 2, tc - 2); - break; - case 1: - CHECK_BETTER(diag, tr - 2, tc + 2); - break; - case 2: - CHECK_BETTER(diag, tr + 2, tc - 2); - break; - case 3: - CHECK_BETTER(diag, tr + 2, tc + 2); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - - // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) - // 1/4 pel - while (--quarteriters) - { - CHECK_BETTER(left, tr, tc - 1); - CHECK_BETTER(right, tr, tc + 1); - CHECK_BETTER(up, tr - 1, tc); - CHECK_BETTER(down, tr + 1, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) - { - case 0: - CHECK_BETTER(diag, tr - 1, tc - 1); - break; - case 1: - CHECK_BETTER(diag, tr - 1, tc + 1); - break; - case 2: - CHECK_BETTER(diag, tr + 1, tc - 1); - break; - case 3: - CHECK_BETTER(diag, tr + 1, tc + 1); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - - bestmv->row = br << 1; - bestmv->col = bc << 1; - - if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL)) - return INT_MAX; - - return besterr; -} -#undef MVC -#undef PRE -#undef SP -#undef DIST -#undef ERR -#undef CHECK_BETTER -#undef MIN -#undef MAX -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - int bestmse = INT_MAX; - MV startmv; - //MV this_mv; - MV this_mv; - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - int whichdir ; - - - // Trap uncodable vectors - if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) - { - bestmv->row <<= 3; - bestmv->col <<= 3; - return INT_MAX; - } - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); - bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // go left then right and check error - this_mv.row = startmv.row; - this_mv.col = ((startmv.col - 8) | 4); - left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 8; - right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - this_mv.row = ((startmv.row - 8) | 4); - up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 8; - down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - //for(whichdir =0;whichdir<4;whichdir++) - //{ - this_mv = startmv; - - switch (whichdir) - { - case 0: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - break; - case 1: - this_mv.col += 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - break; - case 2: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row += 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - break; - case 3: - this_mv.col += 4; - this_mv.row += 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -// } - - - // time to check quarter pels. - if (bestmv->row < startmv.row) - y -= d->pre_stride; - - if (bestmv->col < startmv.col) - y--; - - startmv = *bestmv; - - - - // go left then right and check error - this_mv.row = startmv.row; - - if (startmv.col & 7) - { - this_mv.col = startmv.col - 2; - left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); - } - - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 4; - right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - - if (startmv.row & 7) - { - this_mv.row = startmv.row - 2; - up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.row = (startmv.row - 8) | 6; - up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 4; - down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) - { - case 0: - - if (startmv.row & 7) - { - this_mv.row -= 2; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; - } - } - else - { - this_mv.row = (startmv.row - 8) | 6; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); - } - } - - break; - case 1: - this_mv.col += 2; - - if (startmv.row & 7) - { - this_mv.row -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.row = (startmv.row - 8) | 6; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - - break; - case 2: - this_mv.row += 2; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; - } - - break; - case 3: - this_mv.col += 2; - this_mv.row += 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -// } - - return bestmse; -} - -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - int bestmse = INT_MAX; - MV startmv; - //MV this_mv; - MV this_mv; - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - - // Trap uncodable vectors - if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) - { - bestmv->row <<= 3; - bestmv->col <<= 3; - return INT_MAX; - } - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); - bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // go left then right and check error - this_mv.row = startmv.row; - this_mv.col = ((startmv.col - 8) | 4); - left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 8; - right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - this_mv.row = ((startmv.row - 8) | 4); - up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 8; - down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - // somewhat strangely not doing all the diagonals for half pel is slower than doing them. -#if 0 - // now check 1 more diagonal - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - - switch (whichdir) - { - case 0: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 1: - this_mv.col += 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 2: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 3: - this_mv.col += 4; - this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -#else - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col += 8; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = startmv.row + 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col += 8; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -#endif - return bestmse; -} - -#if 1 - -#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector -#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. -#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best -const MV next_chkpts[6][3] = -{ - {{ -2, 0}, { -1, -2}, {1, -2}}, - {{ -1, -2}, {1, -2}, {2, 0}}, - {{1, -2}, {2, 0}, {1, 2}}, - {{2, 0}, {1, 2}, { -1, 2}}, - {{1, 2}, { -1, 2}, { -2, 0}}, - {{ -1, 2}, { -2, 0}, { -1, -2}} -}; -int vp8_hex_search -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, - int *mvsadcost[2], - int *mvcost[2] -) -{ - MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; - MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; - int i, j; - unsigned char *src = (*(b->base_src) + b->src); - int src_stride = b->src_stride; - int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; - unsigned int besterr, thiserr = 0x7fffffff; - int k = -1, tk; - - if (bc < x->mv_col_min) bc = x->mv_col_min; - - if (bc > x->mv_col_max) bc = x->mv_col_max; - - if (br < x->mv_row_min) br = x->mv_row_min; - - if (br > x->mv_row_max) br = x->mv_row_max; - - rr >>= 1; - rc >>= 1; - - besterr = ERR(br, bc, thiserr); - - // hex search - //j=0 - tr = br; - tc = bc; - - for (i = 0; i < 6; i++) - { - int nr = tr + hex[i].row, nc = tc + hex[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - //CHECK_BETTER(thiserr,nr,nc); - if ((thiserr = ERR(nr, nc, besterr)) < besterr) - { - besterr = thiserr; - br = nr; - bc = nc; - k = i; - } - } - - if (tr == br && tc == bc) - goto cal_neighbors; - - for (j = 1; j < 127; j++) - { - tr = br; - tc = bc; - tk = k; - - for (i = 0; i < 3; i++) - { - int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - //CHECK_BETTER(thiserr,nr,nc); - if ((thiserr = ERR(nr, nc, besterr)) < besterr) - { - besterr = thiserr; - br = nr; - bc = nc; //k=(tk+5+i)%6;} - k = tk + 5 + i; - - if (k >= 12) k -= 12; - else if (k >= 6) k -= 6; - } - } - - if (tr == br && tc == bc) - break; - } - - // check 8 1 away neighbors -cal_neighbors: - tr = br; - tc = bc; - - for (i = 0; i < 8; i++) - { - int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - CHECK_BETTER(thiserr, nr, nc); - } - - best_mv->row = br; - best_mv->col = bc; - - return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; -} -#undef MVC -#undef PRE -#undef SP -#undef DIST -#undef ERR -#undef CHECK_BETTER - -#else - -#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector -#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. -#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best - -int vp8_hex_search -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, - int *mvsadcost[2], - int *mvcost[2] -) -{ - MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ; - MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; - int i, j; - unsigned char *src = (*(b->base_src) + b->src); - int src_stride = b->src_stride; - //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc; - int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; - unsigned int besterr, thiserr = 0x7fffffff; - - /* - if ( rc < x->mv_col_min) bc = x->mv_col_min; - if ( rc > x->mv_col_max) bc = x->mv_col_max; - if ( rr < x->mv_row_min) br = x->mv_row_min; - if ( rr > x->mv_row_max) br = x->mv_row_max; - rr>>=1; - rc>>=1; - br>>=3; - bc>>=3; - */ - if (bc < x->mv_col_min) bc = x->mv_col_min; - - if (bc > x->mv_col_max) bc = x->mv_col_max; - - if (br < x->mv_row_min) br = x->mv_row_min; - - if (br > x->mv_row_max) br = x->mv_row_max; - - rr >>= 1; - rc >>= 1; - - besterr = ERR(br, bc, thiserr); - - // hex search jbb changed to 127 to avoid max 256 problem steping by 2. - for (j = 0; j < 127; j++) - { - tr = br; - tc = bc; - - for (i = 0; i < 6; i++) - { - int nr = tr + hex[i].row, nc = tc + hex[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - CHECK_BETTER(thiserr, nr, nc); - } - - if (tr == br && tc == bc) - break; - } - - // check 8 1 away neighbors - tr = br; - tc = bc; - - for (i = 0; i < 8; i++) - { - int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - CHECK_BETTER(thiserr, nr, nc); - } - - best_mv->row = br; - best_mv->col = bc; - - return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; -} -#undef MVC -#undef PRE -#undef SP -#undef DIST -#undef ERR -#undef CHECK_BETTER - -#endif - -int vp8_diamond_search_sad -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_ptr_t *fn_ptr, - int *mvsadcost[2], - int *mvcost[2] -) -{ - int i, j, step; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - unsigned char *best_address; - - int tot_steps; - MV this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - int this_row_offset; - int this_col_offset; - search_site *ss; - - unsigned char *check_here; - int thissad; - - // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); - best_address = in_what; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - i = 1; - best_mv->row = ref_row; - best_mv->col = ref_col; - - *num00 = 0; - - for (step = 0; step < tot_steps ; step++) - { - for (j = 0 ; j < x->searches_per_step ; j++) - { - // Trap illegal vectors - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) - - { - check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - - i++; - } - - if (best_site != last_site) - { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; - } - else if (best_address == in_what) - (*num00)++; - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); -} - -int vp8_diamond_search_sadx4 -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_ptr_t *fn_ptr, - int *mvsadcost[2], - int *mvcost[2] -) -{ - int i, j, step; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - unsigned char *best_address; - - int tot_steps; - MV this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - int this_row_offset; - int this_col_offset; - search_site *ss; - - unsigned char *check_here; - int thissad; - - // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); - best_address = in_what; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - i = 1; - best_mv->row = ref_row; - best_mv->col = ref_col; - - *num00 = 0; - - for (step = 0; step < tot_steps ; step++) - { - int check_row_min, check_col_min, check_row_max, check_col_max; - - check_row_min = x->mv_row_min - best_mv->row; - check_row_max = x->mv_row_max - best_mv->row; - check_col_min = x->mv_col_min - best_mv->col; - check_col_max = x->mv_col_max - best_mv->col; - - for (j = 0 ; j < x->searches_per_step ; j += 4) - { - char *block_offset[4]; - unsigned int valid_block[4]; - int all_in = 1, t; - - for (t = 0; t < 4; t++) - { - valid_block [t] = (ss[t+i].mv.col > check_col_min); - valid_block [t] &= (ss[t+i].mv.col < check_col_max); - valid_block [t] &= (ss[t+i].mv.row > check_row_min); - valid_block [t] &= (ss[t+i].mv.row < check_row_max); - - all_in &= valid_block[t]; - block_offset[t] = ss[i+t].offset + best_address; - } - - if (all_in) - { - int sad_array[4]; - - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); - - for (t = 0; t < 4; t++, i++) - { - thissad = sad_array[t]; - - if (thissad < bestsad) - { - this_mv.row = (best_mv->row + ss[i].mv.row) << 3; - this_mv.col = (best_mv->col + ss[i].mv.col) << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - } - else - { - int t; - - for (t = 0; t < 4; i++, t++) - { - // Trap illegal vectors - if (valid_block[t]) - - { - check_here = block_offset[t]; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - } - } - } - - if (best_site != last_site) - { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; - } - else if (best_address == in_what) - (*num00)++; - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); -} - - -#if !(CONFIG_REALTIME_ONLY) -int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) -{ - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - MV *best_mv = &d->bmi.mv.as_mv; - MV this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - int thissad; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - - //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14)); - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max ; r++) - { - this_mv.row = r << 3; - check_here = r * mv_stride + in_what + col_min; - - for (c = col_min; c < col_max; c++) - { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - this_mv.col = c << 3; - //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14)); - //thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)]; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); - - if (thissad < bestsad) - { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; - bestaddress = check_here; - } - - check_here++; - } - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - else - return INT_MAX; -} - -int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) -{ - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - MV *best_mv = &d->bmi.mv.as_mv; - MV this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - int thissad; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - - int sad_array[3]; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max ; r++) - { - this_mv.row = r << 3; - check_here = r * mv_stride + in_what + col_min; - c = col_min; - - while ((c + 3) < col_max) - { - int i; - - fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); - - for (i = 0; i < 3; i++) - { - thissad = sad_array[i]; - - if (thissad < bestsad) - { - this_mv.col = c << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - } - - while (c < col_max) - { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.col = c << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; - bestaddress = check_here; - } - } - - check_here ++; - c ++; - } - - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - else - return INT_MAX; -} -#endif - -#ifdef ENTROPY_STATS -void print_mode_context(void) -{ - FILE *f = fopen("modecont.c", "w"); - int i, j; - - fprintf(f, "#include \"entropy.h\"\n"); - fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); - fprintf(f, "{\n"); - - for (j = 0; j < 6; j++) - { - fprintf(f, " { // %d \n", j); - fprintf(f, " "); - - for (i = 0; i < 4; i++) - { - int overal_prob; - int this_prob; - int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; - - // Overall probs - count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; - - if (count) - overal_prob = 256 * mv_mode_cts[i][0] / count; - else - overal_prob = 128; - - if (overal_prob == 0) - overal_prob = 1; - - // context probs - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - - if (count) - this_prob = 256 * mv_ref_ct[j][i][0] / count; - else - this_prob = 128; - - if (this_prob == 0) - this_prob = 1; - - fprintf(f, "%5d, ", this_prob); - //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); - //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); - } - - fprintf(f, " },\n"); - } - - fprintf(f, "};\n"); - fclose(f); -} - -/* MV ref count ENTROPY_STATS stats code */ -#ifdef ENTROPY_STATS -void init_mv_ref_counts() -{ - vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); -} - -void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) -{ - if (m == ZEROMV) - { - ++mv_ref_ct [ct[0]] [0] [0]; - ++mv_mode_cts[0][0]; - } - else - { - ++mv_ref_ct [ct[0]] [0] [1]; - ++mv_mode_cts[0][1]; - - if (m == NEARESTMV) - { - ++mv_ref_ct [ct[1]] [1] [0]; - ++mv_mode_cts[1][0]; - } - else - { - ++mv_ref_ct [ct[1]] [1] [1]; - ++mv_mode_cts[1][1]; - - if (m == NEARMV) - { - ++mv_ref_ct [ct[2]] [2] [0]; - ++mv_mode_cts[2][0]; - } - else - { - ++mv_ref_ct [ct[2]] [2] [1]; - ++mv_mode_cts[2][1]; - - if (m == NEWMV) - { - ++mv_ref_ct [ct[3]] [3] [0]; - ++mv_mode_cts[3][0]; - } - else - { - ++mv_ref_ct [ct[3]] [3] [1]; - ++mv_mode_cts[3][1]; - } - } - } - } -} - -#endif/* END MV ref count ENTROPY_STATS stats code */ - -#endif diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm index d5dec440d..8c191a753 100644 --- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm +++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm index de1c25469..ca351a1c4 100644 --- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm +++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm index 11070377b..ca1ea9c18 100644 --- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm +++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm index 6169f10da..d7c590e15 100644 --- a/vp8/encoder/arm/neon/sad16_neon.asm +++ b/vp8/encoder/arm/neon/sad16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm index 28604ddeb..23ba6df93 100644 --- a/vp8/encoder/arm/neon/sad8_neon.asm +++ b/vp8/encoder/arm/neon/sad8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm index 26bc0d06c..5af5cb888 100644 --- a/vp8/encoder/arm/neon/shortfdct_neon.asm +++ b/vp8/encoder/arm/neon/shortfdct_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm index 8781ca0cc..3ea00f8b9 100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm index 64b83ca43..e1a46869a 100644 --- a/vp8/encoder/arm/neon/variance_neon.asm +++ b/vp8/encoder/arm/neon/variance_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm index f26b4d7ae..b0450e523 100644 --- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm +++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm index f53596727..6af4e87ba 100644 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm index 5269c0af8..ba3decf6c 100644 --- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm +++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm index aec716e3b..1b09cfe4c 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm index 3d02d7c40..0a2b71c49 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm @@ -1,16 +1,17 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8_sub_pixel_variance16x16s_4_0_neon| - EXPORT |vp8_sub_pixel_variance16x16s_0_4_neon| - EXPORT |vp8_sub_pixel_variance16x16s_4_4_neon| + EXPORT |vp8_variance_halfpixvar16x16_h_neon| + EXPORT |vp8_variance_halfpixvar16x16_v_neon| + EXPORT |vp8_variance_halfpixvar16x16_hv_neon| EXPORT |vp8_sub_pixel_variance16x16s_neon| ARM REQUIRE8 @@ -19,7 +20,7 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon +;unsigned int vp8_variance_halfpixvar16x16_h_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -28,7 +29,7 @@ ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_4_0_neon| PROC +|vp8_variance_halfpixvar16x16_h_neon| PROC push {lr} mov r12, #4 ;loop counter @@ -119,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon ENDP ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon +;unsigned int vp8_variance_halfpixvar16x16_v_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -128,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_0_4_neon| PROC +|vp8_variance_halfpixvar16x16_v_neon| PROC push {lr} mov r12, #4 ;loop counter @@ -215,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon ENDP ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon +;unsigned int vp8_variance_halfpixvar16x16_hv_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -224,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_4_4_neon| PROC +|vp8_variance_halfpixvar16x16_hv_neon| PROC push {lr} vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm index bd56761fa..cf4da62fa 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c index 0586e55d8..b2d8f2b2c 100644 --- a/vp8/encoder/arm/picklpf_arm.c +++ b/vp8/encoder/arm/picklpf_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c index 46906d3a2..65c616614 100644 --- a/vp8/encoder/arm/quantize_arm.c +++ b/vp8/encoder/arm/quantize_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { - d->eob = vp8_fast_quantize_b_neon_func(b->coeff, &b->zbin[0][0], d->qcoeff, d->dqcoeff, d->dequant[0], vp8_rvsplus1_default_zig_zag1d, &b->round[0][0], &b->quant[0][0]); + d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant); } /* diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h index e93f0fef1..5f9155eb1 100644 --- a/vp8/encoder/arm/quantize_arm.h +++ b/vp8/encoder/arm/quantize_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,8 +15,11 @@ #if HAVE_ARMV7 extern prototype_quantize_block(vp8_fast_quantize_b_neon); -#undef vp8_quantize_fastquantb -#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon +/* The neon quantizer has not been updated to match the new exact + * quantizer introduced in commit e04e2935 + */ +//#undef vp8_quantize_fastquantb +//#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon #endif diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index d9fc9b3e0..0e5f62fcf 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -29,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon); //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c); //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); //extern prototype_getmbss(vp8_get_mb_ss_c); extern prototype_variance(vp8_mse16x16_neon); @@ -37,6 +41,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon); //extern prototype_variance2(vp8_get16x16var_c); extern prototype_sad(vp8_get4x4sse_cs_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_variance_sad4x4 #define vp8_variance_sad4x4 vp8_sad4x4_neon @@ -82,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon + //#undef vp8_variance_getmbss //#define vp8_variance_getmbss vp8_get_mb_ss_c @@ -99,6 +113,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon); #undef vp8_variance_get4x4sse_cs #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon +#endif #endif diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c index 8cdf0791f..c595ca3c0 100644 --- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c +++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ |